From e6a8ca816bf2bf5c20bb5ada92739c1677f1e397 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 17 Oct 2025 20:06:03 -0400 Subject: [PATCH 01/10] DOC: Set remaining __module__ attributes --- pandas/_config/config.py | 1 + pandas/_libs/missing.pyx | 2 +- pandas/_libs/tslibs/nattype.pyx | 2 +- pandas/_libs/tslibs/np_datetime.pyx | 2 + pandas/_libs/tslibs/offsets.pyx | 2 + pandas/_libs/tslibs/period.pyx | 1 + pandas/_typing.py | 12 +++++ pandas/core/apply.py | 1 + pandas/core/arrays/arrow/array.py | 1 + pandas/core/arrays/base.py | 2 + pandas/core/arrays/boolean.py | 1 + pandas/core/arrays/categorical.py | 1 + pandas/core/arrays/datetimes.py | 1 + pandas/core/arrays/floating.py | 1 + pandas/core/arrays/integer.py | 1 + pandas/core/arrays/interval.py | 1 + pandas/core/arrays/numpy_.py | 1 + pandas/core/arrays/period.py | 1 + pandas/core/arrays/sparse/array.py | 1 + pandas/core/arrays/string_.py | 1 + pandas/core/arrays/string_arrow.py | 1 + pandas/core/arrays/timedeltas.py | 1 + pandas/core/col.py | 1 + pandas/core/dtypes/base.py | 1 + pandas/core/flags.py | 1 + pandas/core/groupby/grouper.py | 1 + pandas/core/indexers/objects.py | 3 ++ pandas/core/indexes/frozen.py | 1 + pandas/core/indexes/range.py | 1 + pandas/core/indexing.py | 2 - pandas/core/interchange/dataframe_protocol.py | 1 + pandas/core/resample.py | 5 +++ pandas/core/window/ewm.py | 2 + pandas/core/window/expanding.py | 2 + pandas/core/window/rolling.py | 3 ++ pandas/io/json/_json.py | 1 + pandas/io/sas/sasreader.py | 1 + pandas/io/stata.py | 1 + pandas/tests/api/test_api.py | 44 +++++++++++++++++++ pandas/util/version/__init__.py | 1 + 40 files changed, 106 insertions(+), 4 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 6d9e2a69abf47..4d0b7598cf72d 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -101,6 +101,7 @@ class RegisteredOption(NamedTuple): class OptionError(AttributeError, KeyError): + __module__ = "pandas.errors" """ Exception raised for pandas.options. diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index a7aea7e766304..5673b2d4cc393 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -393,7 +393,7 @@ class NAType(C_NAType): >>> True | pd.NA True """ - __module__ = "pandas" + __module__ = "pandas.api.typing" _instance = None diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 3e22c13e37f76..72a6967b6a313 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -372,7 +372,7 @@ class NaTType(_NaT): 1 NaT """ - __module__ = "pandas" + __module__ = "pandas.api.typing" def __new__(cls): cdef _NaT base diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 1b7f04fe17238..0fc7a6945d2e0 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -192,6 +192,7 @@ class OutOfBoundsDatetime(ValueError): OutOfBoundsDatetime: Parsing "08335394550" to datetime overflows, at position 0 """ + __module__ = "pandas.errors" pass @@ -212,6 +213,7 @@ class OutOfBoundsTimedelta(ValueError): OutOfBoundsTimedelta: Cannot cast 139999 days 00:00:00 to unit='ns' without overflow. """ + __module__ = "pandas.errors" # Timedelta analogue to OutOfBoundsDatetime pass diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index a44d819c7899a..ad3fb72d31559 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1819,6 +1819,8 @@ class DateOffset(RelativeDeltaOffset, metaclass=OffsetMeta): >>> ts + pd.DateOffset(hour=8) Timestamp('2017-01-01 08:10:11') """ + __module__ = "pandas" + def __setattr__(self, name, value): raise AttributeError("DateOffset objects are immutable.") diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 95a18f8cb2cad..d415bb49e491f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1627,6 +1627,7 @@ DIFFERENT_FREQ = ("Input has different freq={other_freq} " class IncompatibleFrequency(TypeError): + __module__ = "pandas.errors" """ Raised when trying to compare or operate between Periods with different frequencies. diff --git a/pandas/_typing.py b/pandas/_typing.py index c9af531fd90b7..f23dc529f2f83 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -107,6 +107,8 @@ class SequenceNotStr(Protocol[_T_co]): + __module__ = "pandas.api.typing.aliases" + @overload def __getitem__(self, index: SupportsIndex, /) -> _T_co: ... @@ -278,12 +280,16 @@ def tell(self) -> int: class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]): + __module__ = "pandas.api.typing.aliases" + def read(self, n: int = ..., /) -> AnyStr_co: # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File ... class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]): + __module__ = "pandas.api.typing.aliases" + def write(self, b: AnyStr_contra, /) -> Any: # for gzip.GzipFile, bz2.BZ2File ... @@ -294,14 +300,20 @@ def flush(self) -> Any: class ReadPickleBuffer(ReadBuffer[bytes], Protocol): + __module__ = "pandas.api.typing.aliases" + def readline(self) -> bytes: ... class WriteExcelBuffer(WriteBuffer[bytes], Protocol): + __module__ = "pandas.api.typing.aliases" + def truncate(self, size: int | None = ..., /) -> int: ... class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol): + __module__ = "pandas.api.typing.aliases" + def __iter__(self) -> Iterator[AnyStr_co]: # for engine=python ... diff --git a/pandas/core/apply.py b/pandas/core/apply.py index b305cbfaa3a1e..778c58e8e6289 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -76,6 +76,7 @@ class BaseExecutionEngine(abc.ABC): + __module__ = "pandas.api.executors" """ Base class for execution engines for map and apply methods. diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 63052c4f4fea9..601ec0a91b413 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -246,6 +246,7 @@ class ArrowExtensionArray( ExtensionArraySupportsAnyAll, ArrowStringArrayMixin, ): + __module__ = "pandas.arrays" """ Pandas ExtensionArray backed by a PyArrow ChunkedArray. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e8ca51ef92a94..fb253e085cafc 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -106,6 +106,7 @@ class ExtensionArray: + __module__ = "pandas.api.extensions" """ Abstract base class for custom 1-D array types. @@ -2787,6 +2788,7 @@ def _add_logical_ops(cls) -> None: class ExtensionScalarOpsMixin(ExtensionOpsMixin): + __module__ = "pandas.api.extensions" """ A mixin for defining ops on an ExtensionArray. diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index aca2cafe80889..b993839d6605d 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -246,6 +246,7 @@ def coerce_to_array( class BooleanArray(BaseMaskedArray): + __module__ = "pandas.arrays" """ Array of boolean (True/False) data with missing values. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d59499ed99c75..c7cf1934d08b7 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -246,6 +246,7 @@ def contains(cat, key, container) -> bool: class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin): + __module__ = "pandas" """ Represent a categorical variable in classic R / S-plus fashion. diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 64d0347aa815e..cc6c94ff1493f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -172,6 +172,7 @@ def f(self): class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): + __module__ = "pandas.arrays" """ Pandas ExtensionArray for tz-naive or tz-aware datetime data. diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index ed6ed6b22ad48..810a0e6cff1d9 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -64,6 +64,7 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr class FloatingArray(NumericArray): + __module__ = "pandas.arrays" """ Array of floating (optional missing) values. diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 366b508f9d400..e89b67e608d0b 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -72,6 +72,7 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr class IntegerArray(NumericArray): + __module__ = "pandas.arrays" """ Array of integer (optional missing) values. diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index ace868bda52d3..f4ee018665bfc 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -177,6 +177,7 @@ class IntervalArray(IntervalMixin, ExtensionArray): + __module__ = "pandas.arrays" """ Pandas array for interval data that are closed on the same side. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index fab51ffa56919..eca47d3c9657f 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -57,6 +57,7 @@ class NumpyExtensionArray( NDArrayBackedExtensionArray, ObjectStringArrayMixin, ): + __module__ = "pandas.arrays" """ A pandas ExtensionArray for NumPy data. diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 90388336ba83d..2f50d72ce95ad 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -123,6 +123,7 @@ def f(self): # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] + __module__ = "pandas.arrays" """ Pandas ExtensionArray for storing Period data. diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index e6ff67af78700..98afa7f183f4b 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -300,6 +300,7 @@ def _wrap_result( class SparseArray(OpsMixin, PandasObject, ExtensionArray): + __module__ = "pandas.arrays" """ An ExtensionArray for storing sparse data. diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index efcfddbebb0d4..a6fe8fd2cadb8 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -551,6 +551,7 @@ def view(self, dtype: Dtype | None = None) -> Self: # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] + __module__ = "pandas.arrays" """ Extension array for string data. diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 9046d83dcc09f..7ff7905bfa5a6 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -82,6 +82,7 @@ def _is_string_view(typ): class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringArray): + __module__ = "pandas.arrays" """ Extension array for string data in a ``pyarrow.ChunkedArray``. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 2b942041edf89..fecadf74fe0ba 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -104,6 +104,7 @@ def f(self) -> np.ndarray: class TimedeltaArray(dtl.TimelikeOps): + __module__ = "pandas.arrays" """ Pandas ExtensionArray for timedelta data. diff --git a/pandas/core/col.py b/pandas/core/col.py index eec1d241df92d..194dbf2cbfd57 100644 --- a/pandas/core/col.py +++ b/pandas/core/col.py @@ -70,6 +70,7 @@ def _pretty_print_args_kwargs(*args: Any, **kwargs: Any) -> str: class Expression: + __module__ = "pandas.api.typing" """ Class representing a deferred column. diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index ae48ad153e2d6..22ab6fa0b9d9d 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -42,6 +42,7 @@ class ExtensionDtype: + __module__ = "pandas.api.extensions" """ A custom data type, to be paired with an ExtensionArray. diff --git a/pandas/core/flags.py b/pandas/core/flags.py index eceb86dc61d9f..52436eed55a15 100644 --- a/pandas/core/flags.py +++ b/pandas/core/flags.py @@ -8,6 +8,7 @@ class Flags: + __module__ = "pandas" """ Flags that apply to pandas objects. diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 95bb51f29db40..b2200469a8dec 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -64,6 +64,7 @@ class Grouper: + __module__ = "pandas" """ A Grouper allows the user to specify a groupby instruction for an object. diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 99130da641efb..3e6d32637cfa5 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -17,6 +17,7 @@ class BaseIndexer: + __module__ = "pandas.api.indexers" """ Base class for window bounds calculations. @@ -211,6 +212,7 @@ def get_window_bounds( class VariableOffsetWindowIndexer(BaseIndexer): + __module__ = "pandas.api.indexers" """ Calculate window boundaries based on a non-fixed offset such as a BusinessDay. @@ -434,6 +436,7 @@ def get_window_bounds( class FixedForwardWindowIndexer(BaseIndexer): + __module__ = "pandas.api.indexers" """ Creates window boundaries for fixed-length windows that include the current row. diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index f75699a9b745f..bb6acd18ca014 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -20,6 +20,7 @@ class FrozenList(PandasObject, list): + __module__ = "pandas.api.typing" """ Container that doesn't allow setting item *but* because it's technically hashable, will be used diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 4560d3cc3479a..81c4871bf9862 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -77,6 +77,7 @@ def min_fitting_element(start: int, step: int, lower_limit: int) -> int: @set_module("pandas") class RangeIndex(Index): + __module__ = "pandas" """ Immutable Index implementing a monotonic integer range. diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9246309c0c7f1..2dbf04d8eb21a 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -30,7 +30,6 @@ from pandas.errors.cow import _chained_assignment_msg from pandas.util._decorators import ( doc, - set_module, ) from pandas.core.dtypes.cast import ( @@ -104,7 +103,6 @@ # the public IndexSlicerMaker -@set_module("pandas") class _IndexSlice: """ Create an object to more easily perform multi-index slicing. diff --git a/pandas/core/interchange/dataframe_protocol.py b/pandas/core/interchange/dataframe_protocol.py index 95e7b6a26f93a..a3c8cf7f75fe6 100644 --- a/pandas/core/interchange/dataframe_protocol.py +++ b/pandas/core/interchange/dataframe_protocol.py @@ -363,6 +363,7 @@ def get_buffers(self) -> ColumnBuffers: class DataFrame(ABC): + __module__ = "pandas.api.interchange" """ A data frame class, with only the methods required by the interchange protocol defined. diff --git a/pandas/core/resample.py b/pandas/core/resample.py index c85f6b36f0947..c63586c942f35 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -113,6 +113,7 @@ class Resampler(BaseGroupBy, PandasObject): + __module__ = "pandas.api.typing" """ Class for resampling datetimelike data, a groupby-like operation. See aggregate, transform, and apply functions on this object. @@ -2169,6 +2170,7 @@ def _wrap_result(self, result): class DatetimeIndexResamplerGroupby( # type: ignore[misc] _GroupByMixin, DatetimeIndexResampler ): + __module__ = "pandas.api.typing" """ Provides a resample of a groupby implementation """ @@ -2270,6 +2272,7 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): class PeriodIndexResamplerGroupby( # type: ignore[misc] _GroupByMixin, PeriodIndexResampler ): + __module__ = "pandas.api.typing" """ Provides a resample of a groupby implementation. """ @@ -2306,6 +2309,7 @@ def _adjust_binner_for_upsample(self, binner): class TimedeltaIndexResamplerGroupby( # type: ignore[misc] _GroupByMixin, TimedeltaIndexResampler ): + __module__ = "pandas.api.typing" """ Provides a resample of a groupby implementation. """ @@ -2345,6 +2349,7 @@ def get_resampler_for_grouping( class TimeGrouper(Grouper): + __module__ = "pandas.api.typing" """ Custom groupby class for time-interval grouping. diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 73e4de6ea6208..9e57738d620c7 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -130,6 +130,7 @@ def _calculate_deltas( class ExponentialMovingWindow(BaseWindow): + __module__ = "pandas.api.typing" r""" Provide exponentially weighted (EW) calculations. @@ -903,6 +904,7 @@ def _cov(X, Y): class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow): + __module__ = "pandas.api.typing" """ Provide an exponential moving window groupby implementation. """ diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index afab2295a8f69..b194ef9c674de 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -38,6 +38,7 @@ class Expanding(RollingAndExpandingMixin): + __module__ = "pandas.api.typing" """ Provide expanding window calculations. @@ -1451,6 +1452,7 @@ def corr( class ExpandingGroupby(BaseWindowGroupby, Expanding): + __module__ = "pandas.api.typing" """ Provide a expanding groupby implementation. """ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d3c417a008916..915c1f0d45572 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -856,6 +856,7 @@ def _gotitem(self, key, ndim, subset=None): class Window(BaseWindow): + __module__ = "pandas.api.typing" """ Provide rolling window calculations. @@ -1968,6 +1969,7 @@ def corr_func(x, y): class Rolling(RollingAndExpandingMixin): + __module__ = "pandas.api.typing" _attributes: list[str] = [ "window", "min_periods", @@ -3530,6 +3532,7 @@ def corr( class RollingGroupby(BaseWindowGroupby, Rolling): + __module__ = "pandas.api.typing" """ Provide a rolling groupby implementation. """ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 481f6a3a0aa61..d258767fc2e70 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -821,6 +821,7 @@ def read_json( class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]): + __module__ = "pandas.api.typing" """ JsonReader provides an interface for reading in a JSON file. diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 46b22310cbfca..1daff7b3d0c7b 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -38,6 +38,7 @@ class SASReader(Iterator["DataFrame"], ABC): + __module__ = "pandas.api.typing" """ Abstract class for XportReader and SAS7BDATReader. """ diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1a62427b08057..1f953650365ef 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1019,6 +1019,7 @@ def __init__(self) -> None: class StataReader(StataParser, abc.Iterator): + __module__ = "pandas.api.typing" __doc__ = _stata_reader_doc _path_or_buf: IO[bytes] # Will be assigned by `_open_file`. diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index b347e24bc5268..e338ad46df537 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -564,3 +564,47 @@ def test_set_module(): assert pd.read_xml.__module__ == "pandas" assert api.typing.SeriesGroupBy.__module__ == "pandas.api.typing" assert api.typing.DataFrameGroupBy.__module__ == "pandas.api.typing" + + +import importlib +import inspect +import pathlib +import pkgutil + + +def get_classes(module): + classes = [] + + for name, obj in inspect.getmembers(module): + if inspect.isclass(obj): + classes.append(obj) + + paths = [str(pathlib.Path(module.__file__).parent)] + for _, submodule_name, is_pkg in pkgutil.walk_packages( + paths, module.__name__ + "." + ): + try: + submodule = importlib.import_module(submodule_name) + classes.extend(get_classes(submodule)) + except ImportError: + # pass + raise + return classes + + +def test_module_attribute(): + # Check that each class pandas defines can be imported from + # the __module__ attribute + classes = get_classes(pd) + assert len(classes) > 100, len(classes) + failures = [] + for klass in classes: + if klass.__name__ == "ellipsis": + # This is a weird one. + continue + try: + m = importlib.import_module(klass.__module__) + getattr(m, klass.__name__) + except AttributeError: + failures.append(klass) + assert len(failures) == 0, failures diff --git a/pandas/util/version/__init__.py b/pandas/util/version/__init__.py index 15696c9292eda..412a606bb023e 100644 --- a/pandas/util/version/__init__.py +++ b/pandas/util/version/__init__.py @@ -111,6 +111,7 @@ def parse(version: str) -> Version: # The docstring is from an older version of the packaging library to avoid # errors in the docstring validation. class InvalidVersion(ValueError): + __module__ = "pandas.errors" """ An invalid version was found, users should refer to PEP 440. From 9881ae090c5bc5daa10eb9bf97d5ece026e2d2e3 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 17 Oct 2025 20:06:38 -0400 Subject: [PATCH 02/10] Cleanup --- pandas/tests/api/test_api.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index e338ad46df537..07c29a01ea948 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -1,5 +1,10 @@ from __future__ import annotations +import importlib +import inspect +import pathlib +import pkgutil + import pytest import pandas as pd @@ -566,12 +571,6 @@ def test_set_module(): assert api.typing.DataFrameGroupBy.__module__ == "pandas.api.typing" -import importlib -import inspect -import pathlib -import pkgutil - - def get_classes(module): classes = [] @@ -587,8 +586,7 @@ def get_classes(module): submodule = importlib.import_module(submodule_name) classes.extend(get_classes(submodule)) except ImportError: - # pass - raise + pass return classes From a10b86bd37430d1e688dba9cb1985832fa4cca5d Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 17 Oct 2025 20:08:14 -0400 Subject: [PATCH 03/10] Mark test as slow --- pandas/tests/api/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 07c29a01ea948..b41b0aa7ad839 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -590,6 +590,7 @@ def get_classes(module): return classes +@pytest.mark.slow def test_module_attribute(): # Check that each class pandas defines can be imported from # the __module__ attribute From 3646ea650e898bb07f93c46faa0efd6146b705f7 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 18 Oct 2025 09:24:14 -0400 Subject: [PATCH 04/10] More changes --- doc/source/reference/groupby.rst | 4 +- doc/source/reference/resampling.rst | 2 +- doc/source/reference/window.rst | 11 +++-- doc/source/user_guide/enhancingperf.rst | 2 +- pandas/_config/config.py | 4 ++ pandas/_libs/lib.pyx | 2 + pandas/_libs/missing.pyx | 1 + pandas/_libs/tslibs/nattype.pyx | 1 + pandas/core/arrays/base.py | 2 +- pandas/core/dtypes/common.py | 20 ++++---- pandas/core/indexing.py | 1 + pandas/core/window/rolling.py | 8 +-- pandas/plotting/_core.py | 2 + pandas/plotting/_misc.py | 1 + pandas/tests/api/test_api.py | 66 ++++++++++++++++--------- 15 files changed, 82 insertions(+), 45 deletions(-) diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 004651ac0074f..181ae030e1227 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -5,7 +5,7 @@ ======= GroupBy ======= -.. currentmodule:: pandas.core.groupby +.. currentmodule:: pandas.api.typing :class:`pandas.api.typing.DataFrameGroupBy` and :class:`pandas.api.typing.SeriesGroupBy` instances are returned by groupby calls :func:`pandas.DataFrame.groupby` and @@ -40,7 +40,7 @@ Function application helper NamedAgg -.. currentmodule:: pandas.core.groupby +.. currentmodule:: pandas.api.typing Function application -------------------- diff --git a/doc/source/reference/resampling.rst b/doc/source/reference/resampling.rst index 2e0717081b129..b6cc8c6a1addb 100644 --- a/doc/source/reference/resampling.rst +++ b/doc/source/reference/resampling.rst @@ -5,7 +5,7 @@ ========== Resampling ========== -.. currentmodule:: pandas.core.resample +.. currentmodule:: pandas.api.typing :class:`pandas.api.typing.Resampler` instances are returned by resample calls: :func:`pandas.DataFrame.resample`, :func:`pandas.Series.resample`. diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst index 2bd63f02faf69..80ecf236a0daf 100644 --- a/doc/source/reference/window.rst +++ b/doc/source/reference/window.rst @@ -17,7 +17,7 @@ calls: :func:`pandas.DataFrame.ewm` and :func:`pandas.Series.ewm`. Rolling window functions ------------------------ -.. currentmodule:: pandas.core.window.rolling +.. currentmodule:: pandas.api.typing .. autosummary:: :toctree: api/ @@ -48,7 +48,8 @@ Rolling window functions Weighted window functions ------------------------- -.. currentmodule:: pandas.core.window.rolling +.. currentmodule:: pandas.api.typing + .. autosummary:: :toctree: api/ @@ -62,7 +63,8 @@ Weighted window functions Expanding window functions -------------------------- -.. currentmodule:: pandas.core.window.expanding +.. currentmodule:: pandas.api.typing + .. autosummary:: :toctree: api/ @@ -93,7 +95,8 @@ Expanding window functions Exponentially-weighted window functions --------------------------------------- -.. currentmodule:: pandas.core.window.ewm +.. currentmodule:: pandas.api.typing + .. autosummary:: :toctree: api/ diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst index 9c37f317a805e..a500fd6baac2c 100644 --- a/doc/source/user_guide/enhancingperf.rst +++ b/doc/source/user_guide/enhancingperf.rst @@ -455,7 +455,7 @@ by evaluate arithmetic and boolean expression all at once for large :class:`~pan :func:`~pandas.eval` is many orders of magnitude slower for smaller expressions or objects than plain Python. A good rule of thumb is to only use :func:`~pandas.eval` when you have a - :class:`~pandas.core.frame.DataFrame` with more than 10,000 rows. + :class:`~pandas.DataFrame` with more than 10,000 rows. Supported syntax ~~~~~~~~~~~~~~~~ diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 4d0b7598cf72d..62cfd9968412f 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -412,6 +412,9 @@ def __init__(self, d: dict[str, Any], prefix: str = "") -> None: object.__setattr__(self, "prefix", prefix) def __setattr__(self, key: str, val: Any) -> None: + if key == "__module__": + super().__setattr__(key, val) + return prefix = object.__getattribute__(self, "prefix") if prefix: prefix += "." @@ -442,6 +445,7 @@ def __dir__(self) -> list[str]: options = DictWrapper(_global_config) +options.__module__ = "pandas" # # Functions for use by pandas developers, in addition to User - api diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 039720017aa7b..ae0bbeb7afa4c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2944,7 +2944,9 @@ class _NoDefault(Enum): # Note: no_default is exported to the public API in pandas.api.extensions no_default = _NoDefault.no_default # Sentinel indicating the default value. +no_default.__module__ = "pandas.api.extensions" NoDefault = Literal[_NoDefault.no_default] +NoDefault.__module__ = "pandas.api.typing" @cython.boundscheck(False) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 5673b2d4cc393..a67c533d03e0e 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -546,3 +546,4 @@ class NAType(C_NAType): C_NA = NAType() # C-visible NA = C_NA # Python-visible +NA.__module__ = "pandas" diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 72a6967b6a313..a0265297fe873 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1871,6 +1871,7 @@ default 'raise' c_NaT = NaTType() # C-visible NaT = c_NaT # Python-visible +NaT.__module__ = "pandas" # ---------------------------------------------------------------------- diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fb253e085cafc..26eed3d2fffee 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1807,7 +1807,7 @@ def take( .. code-block:: python def take(self, indices, allow_fill=False, fill_value=None): - from pandas.core.algorithms import take + from pandas.api.extensions import take # If the ExtensionArray is backed by an ndarray, then # just pass that here instead of coercing to object. diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 447b7f90f2314..4691607a1d12a 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -359,7 +359,7 @@ def is_datetime64tz_dtype(arr_or_dtype) -> bool: >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True - >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype + >>> from pandas.api.types import DatetimeTZDtype >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") >>> s = pd.Series([], dtype=dtype) >>> is_datetime64tz_dtype(dtype) @@ -407,7 +407,7 @@ def is_timedelta64_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_timedelta64_dtype + >>> from pandas.api.types import is_timedelta64_dtype >>> is_timedelta64_dtype(object) False >>> is_timedelta64_dtype(np.timedelta64) @@ -452,7 +452,7 @@ def is_period_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_period_dtype + >>> from pandas.api.types import is_period_dtype >>> is_period_dtype(object) False >>> is_period_dtype(pd.PeriodDtype(freq="D")) @@ -507,7 +507,7 @@ def is_interval_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_interval_dtype + >>> from pandas.api.types import is_interval_dtype >>> is_interval_dtype(object) False >>> is_interval_dtype(pd.IntervalDtype()) @@ -684,10 +684,10 @@ def is_dtype_equal(source, target) -> bool: True >>> is_dtype_equal(object, "category") False - >>> from pandas.core.dtypes.dtypes import CategoricalDtype + >>> from pandas.api.types import CategoricalDtype >>> is_dtype_equal(CategoricalDtype(), "category") True - >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype + >>> from pandas.api.types import DatetimeTZDtype >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") False """ @@ -811,7 +811,7 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_signed_integer_dtype + >>> from pandas.api.types import is_signed_integer_dtype >>> is_signed_integer_dtype(str) False >>> is_signed_integer_dtype(int) @@ -1006,7 +1006,7 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool: Examples -------- >>> from pandas.api.types import is_datetime64_any_dtype - >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype + >>> from pandas.api.types import DatetimeTZDtype >>> is_datetime64_any_dtype(str) False >>> is_datetime64_any_dtype(int) @@ -1066,7 +1066,7 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool: Examples -------- >>> from pandas.api.types import is_datetime64_ns_dtype - >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype + >>> from pandas.api.types import DatetimeTZDtype >>> is_datetime64_ns_dtype(str) False >>> is_datetime64_ns_dtype(int) @@ -1121,7 +1121,7 @@ def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: Examples -------- - >>> from pandas.core.dtypes.common import is_timedelta64_ns_dtype + >>> from pandas.api.types import is_timedelta64_ns_dtype >>> is_timedelta64_ns_dtype(np.dtype("m8[ns]")) True >>> is_timedelta64_ns_dtype(np.dtype("m8[ps]")) # Wrong frequency diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2dbf04d8eb21a..3f9749f1f7a99 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -151,6 +151,7 @@ def __getitem__(self, arg): IndexSlice = _IndexSlice() +IndexSlice.__module__ = "pandas" class IndexingMixin: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 915c1f0d45572..3f3ab0b6dc5e7 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1331,7 +1331,7 @@ def sum(self, numeric_only: bool = False, **kwargs): to pass the parameter `win_type`. >>> type(ser.rolling(2, win_type="gaussian")) - + In order to use the `SciPy` Gaussian window we need to provide the parameters `M` and `std`. The parameter `M` corresponds to 2 in our example. @@ -1391,7 +1391,7 @@ def mean(self, numeric_only: bool = False, **kwargs): to pass the parameter `win_type`. >>> type(ser.rolling(2, win_type="gaussian")) - + In order to use the `SciPy` Gaussian window we need to provide the parameters `M` and `std`. The parameter `M` corresponds to 2 in our example. @@ -1453,7 +1453,7 @@ def var(self, ddof: int = 1, numeric_only: bool = False, **kwargs): to pass the parameter `win_type`. >>> type(ser.rolling(2, win_type="gaussian")) - + In order to use the `SciPy` Gaussian window we need to provide the parameters `M` and `std`. The parameter `M` corresponds to 2 in our example. @@ -1508,7 +1508,7 @@ def std(self, ddof: int = 1, numeric_only: bool = False, **kwargs): to pass the parameter `win_type`. >>> type(ser.rolling(2, win_type="gaussian")) - + In order to use the `SciPy` Gaussian window we need to provide the parameters `M` and `std`. The parameter `M` corresponds to 2 in our example. diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index b46af93c447d4..2084088d6a823 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -971,6 +971,8 @@ class PlotAccessor(PandasObject): >>> plot = df.groupby("col2").plot(kind="bar", title="DataFrameGroupBy Plot") """ + __module__ = "pandas.plotting" + _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") _series_kinds = ("pie",) _dataframe_kinds = ("scatter", "hexbin") diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 0f2d824f37ffc..f5f62b168450d 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -766,3 +766,4 @@ def use(self, key, value) -> Generator[_Options]: plot_params = _Options() +plot_params.__module__ = "pandas.plotting" diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index b41b0aa7ad839..318c602155904 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -571,39 +571,61 @@ def test_set_module(): assert api.typing.DataFrameGroupBy.__module__ == "pandas.api.typing" -def get_classes(module): - classes = [] +def get_objects(module_name, recurse): + module = importlib.import_module(module_name) + objs = [] for name, obj in inspect.getmembers(module): - if inspect.isclass(obj): - classes.append(obj) + if inspect.isfunction(obj) or type(obj).__name__ == "cython_function_or_method": + # Sphinx does not use the __module__ attribute for functions, + # so we do not need to overwrite the attribute. + continue + module_dunder = getattr(obj, "__module__", None) + if isinstance(module_dunder, str) and module_dunder.startswith("pandas"): + objs.append((module_name, name, obj)) + + if not recurse: + return objs paths = [str(pathlib.Path(module.__file__).parent)] for _, submodule_name, is_pkg in pkgutil.walk_packages( paths, module.__name__ + "." ): + tail = submodule_name[submodule_name.rfind(".") + 1 :] + if tail.startswith("_"): + continue + if submodule_name == "pandas.api.internals": + continue try: - submodule = importlib.import_module(submodule_name) - classes.extend(get_classes(submodule)) + objs.extend(get_objects(submodule_name, recurse)) except ImportError: pass - return classes + return objs @pytest.mark.slow -def test_module_attribute(): - # Check that each class pandas defines can be imported from - # the __module__ attribute - classes = get_classes(pd) - assert len(classes) > 100, len(classes) - failures = [] - for klass in classes: - if klass.__name__ == "ellipsis": - # This is a weird one. - continue - try: - m = importlib.import_module(klass.__module__) - getattr(m, klass.__name__) - except AttributeError: - failures.append(klass) +@pytest.mark.parametrize( + "module_name", + [ + "pandas", + "pandas.api", + "pandas.arrays", + "pandas.errors", + pytest.param("pandas.io", marks=pytest.mark.xfail(reason="Private imports")), + "pandas.plotting", + "pandas.testing", + ], +) +def test_attributes_module(module_name): + objs = get_objects(module_name, recurse=module_name != "pandas") + failures = [ + (module_name, name, type(obj), obj.__module__) + for module_name, name, obj in objs + if ( + obj.__module__ != module_name + and obj.__module__ != "pandas" + # Can't seem to change __module__ + and name != "Interval" + ) + ] assert len(failures) == 0, failures From dd75e66577bf627fcccdced3497fd6d8b8389aee Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 18 Oct 2025 10:02:39 -0400 Subject: [PATCH 05/10] Cleanups --- pandas/_config/config.py | 4 +- pandas/_libs/tslibs/period.pyx | 2 +- pandas/core/apply.py | 3 +- pandas/core/arrays/arrow/array.py | 3 +- pandas/core/arrays/base.py | 6 +- pandas/core/arrays/boolean.py | 3 +- pandas/core/arrays/categorical.py | 3 +- pandas/core/arrays/datetimes.py | 3 +- pandas/core/arrays/floating.py | 3 +- pandas/core/arrays/integer.py | 3 +- pandas/core/arrays/interval.py | 3 +- pandas/core/arrays/period.py | 3 +- pandas/core/arrays/sparse/array.py | 3 +- pandas/core/arrays/string_.py | 3 +- pandas/core/arrays/string_arrow.py | 3 +- pandas/core/arrays/timedeltas.py | 3 +- pandas/core/col.py | 3 +- pandas/core/dtypes/base.py | 3 +- pandas/core/flags.py | 3 +- pandas/core/groupby/grouper.py | 3 +- pandas/core/indexers/objects.py | 9 ++- pandas/core/indexes/frozen.py | 3 +- pandas/core/indexes/range.py | 3 +- pandas/core/interchange/dataframe_protocol.py | 3 +- pandas/core/resample.py | 15 +++-- pandas/core/window/ewm.py | 6 +- pandas/core/window/expanding.py | 6 +- pandas/core/window/rolling.py | 6 +- pandas/io/json/_json.py | 3 +- pandas/io/sas/sasreader.py | 3 +- pandas/tests/api/test_api.py | 59 ++++++++++++------- 31 files changed, 118 insertions(+), 61 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 62cfd9968412f..210c57ef79698 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -101,7 +101,6 @@ class RegisteredOption(NamedTuple): class OptionError(AttributeError, KeyError): - __module__ = "pandas.errors" """ Exception raised for pandas.options. @@ -118,6 +117,8 @@ class OptionError(AttributeError, KeyError): OptionError: No such option """ + __module__ = "pandas.errors" + # # User API @@ -413,6 +414,7 @@ def __init__(self, d: dict[str, Any], prefix: str = "") -> None: def __setattr__(self, key: str, val: Any) -> None: if key == "__module__": + # Need to be able to set __module__ to pandas for pandas.options super().__setattr__(key, val) return prefix = object.__getattribute__(self, "prefix") diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index d415bb49e491f..facf430060e73 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1627,11 +1627,11 @@ DIFFERENT_FREQ = ("Input has different freq={other_freq} " class IncompatibleFrequency(TypeError): - __module__ = "pandas.errors" """ Raised when trying to compare or operate between Periods with different frequencies. """ + __module__ = "pandas.errors" pass diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 778c58e8e6289..468f24a07cb4a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -76,7 +76,6 @@ class BaseExecutionEngine(abc.ABC): - __module__ = "pandas.api.executors" """ Base class for execution engines for map and apply methods. @@ -89,6 +88,8 @@ class BaseExecutionEngine(abc.ABC): simply runs the code with the Python interpreter and pandas. """ + __module__ = "pandas.api.executors" + @staticmethod @abc.abstractmethod def map( diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 601ec0a91b413..08f1ffdcc6a0d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -246,7 +246,6 @@ class ArrowExtensionArray( ExtensionArraySupportsAnyAll, ArrowStringArrayMixin, ): - __module__ = "pandas.arrays" """ Pandas ExtensionArray backed by a PyArrow ChunkedArray. @@ -297,6 +296,8 @@ class ArrowExtensionArray( Length: 3, dtype: int64[pyarrow] """ # noqa: E501 (http link too long) + __module__ = "pandas.arrays" + _pa_array: pa.ChunkedArray _dtype: ArrowDtype diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 26eed3d2fffee..e091ecf18668d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -106,7 +106,6 @@ class ExtensionArray: - __module__ = "pandas.api.extensions" """ Abstract base class for custom 1-D array types. @@ -257,6 +256,8 @@ class ExtensionArray: https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/list/array.py """ + __module__ = "pandas.api.extensions" + # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. # Don't override this. _typ = "extension" @@ -2788,7 +2789,6 @@ def _add_logical_ops(cls) -> None: class ExtensionScalarOpsMixin(ExtensionOpsMixin): - __module__ = "pandas.api.extensions" """ A mixin for defining ops on an ExtensionArray. @@ -2814,6 +2814,8 @@ class ExtensionScalarOpsMixin(ExtensionOpsMixin): with NumPy arrays. """ + __module__ = "pandas.api.extensions" + @classmethod def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None): """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index b993839d6605d..aa28e846413f0 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -246,7 +246,6 @@ def coerce_to_array( class BooleanArray(BaseMaskedArray): - __module__ = "pandas.arrays" """ Array of boolean (True/False) data with missing values. @@ -305,6 +304,8 @@ class BooleanArray(BaseMaskedArray): Length: 3, dtype: boolean """ + __module__ = "pandas.arrays" + _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c7cf1934d08b7..b570cc90e4948 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -246,7 +246,6 @@ def contains(cat, key, container) -> bool: class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin): - __module__ = "pandas" """ Represent a categorical variable in classic R / S-plus fashion. @@ -362,6 +361,8 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi 'c' """ + __module__ = "pandas" + # For comparisons, so that numpy uses our implementation if the compare # ops, which raise __array_priority__ = 1000 diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cc6c94ff1493f..4cf5f4b13890e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -172,7 +172,6 @@ def f(self): class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): - __module__ = "pandas.arrays" """ Pandas ExtensionArray for tz-naive or tz-aware datetime data. @@ -224,6 +223,8 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): Length: 2, dtype: datetime64[s] """ + __module__ = "pandas.arrays" + _typ = "datetimearray" _internal_fill_value = np.datetime64("NaT", "ns") _recognized_scalars = (datetime, np.datetime64) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 810a0e6cff1d9..e547c3fe76089 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -64,7 +64,6 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr class FloatingArray(NumericArray): - __module__ = "pandas.arrays" """ Array of floating (optional missing) values. @@ -130,6 +129,8 @@ class FloatingArray(NumericArray): Length: 3, dtype: Float32 """ + __module__ = "pandas.arrays" + _dtype_cls = FloatingDtype diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index e89b67e608d0b..7a8ca85a83db5 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -72,7 +72,6 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr class IntegerArray(NumericArray): - __module__ = "pandas.arrays" """ Array of integer (optional missing) values. @@ -143,6 +142,8 @@ class IntegerArray(NumericArray): Length: 3, dtype: UInt16 """ + __module__ = "pandas.arrays" + _dtype_cls = IntegerDtype diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index f4ee018665bfc..b0472c70557e5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -177,7 +177,6 @@ class IntervalArray(IntervalMixin, ExtensionArray): - __module__ = "pandas.arrays" """ Pandas array for interval data that are closed on the same side. @@ -244,6 +243,8 @@ class IntervalArray(IntervalMixin, ExtensionArray): :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. """ + __module__ = "pandas.arrays" + can_hold_na = True _na_value = _fill_value = np.nan diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 2f50d72ce95ad..18e4ff31164ac 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -123,7 +123,6 @@ def f(self): # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] - __module__ = "pandas.arrays" """ Pandas ExtensionArray for storing Period data. @@ -178,6 +177,8 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] Length: 2, dtype: period[D] """ + __module__ = "pandas.arrays" + # array priority higher than numpy scalars __array_priority__ = 1000 _typ = "periodarray" # ABCPeriodArray diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 98afa7f183f4b..396a8b67c2d2d 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -300,7 +300,6 @@ def _wrap_result( class SparseArray(OpsMixin, PandasObject, ExtensionArray): - __module__ = "pandas.arrays" """ An ExtensionArray for storing sparse data. @@ -381,6 +380,8 @@ class SparseArray(OpsMixin, PandasObject, ExtensionArray): Indices: array([2, 3], dtype=int32) """ + __module__ = "pandas.arrays" + _subtyp = "sparse_array" # register ABCSparseArray _hidden_attrs = PandasObject._hidden_attrs | frozenset([]) _sparse_index: SparseIndex diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index a6fe8fd2cadb8..ec591d7711fa9 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -551,7 +551,6 @@ def view(self, dtype: Dtype | None = None) -> Self: # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] - __module__ = "pandas.arrays" """ Extension array for string data. @@ -634,6 +633,8 @@ class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] Length: 3, dtype: boolean """ + __module__ = "pandas.arrays" + # undo the NumpyExtensionArray hack _typ = "extension" diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 7ff7905bfa5a6..489556536ca31 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -82,7 +82,6 @@ def _is_string_view(typ): class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringArray): - __module__ = "pandas.arrays" """ Extension array for string data in a ``pyarrow.ChunkedArray``. @@ -126,6 +125,8 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr Length: 4, dtype: string """ + __module__ = "pandas.arrays" + # error: Incompatible types in assignment (expression has type "StringDtype", # base class "ArrowExtensionArray" defined the type as "ArrowDtype") _dtype: StringDtype # type: ignore[assignment] diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index fecadf74fe0ba..9b3452c318f9c 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -104,7 +104,6 @@ def f(self) -> np.ndarray: class TimedeltaArray(dtl.TimelikeOps): - __module__ = "pandas.arrays" """ Pandas ExtensionArray for timedelta data. @@ -148,6 +147,8 @@ class TimedeltaArray(dtl.TimelikeOps): Length: 2, dtype: timedelta64[ns] """ + __module__ = "pandas.arrays" + _typ = "timedeltaarray" _internal_fill_value = np.timedelta64("NaT", "ns") _recognized_scalars = (timedelta, np.timedelta64, Tick) diff --git a/pandas/core/col.py b/pandas/core/col.py index 194dbf2cbfd57..0182188dd0317 100644 --- a/pandas/core/col.py +++ b/pandas/core/col.py @@ -70,13 +70,14 @@ def _pretty_print_args_kwargs(*args: Any, **kwargs: Any) -> str: class Expression: - __module__ = "pandas.api.typing" """ Class representing a deferred column. This is not meant to be instantiated directly. Instead, use :meth:`pandas.col`. """ + __module__ = "pandas.api.typing" + def __init__(self, func: Callable[[DataFrame], Any], repr_str: str) -> None: self._func = func self._repr_str = repr_str diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 22ab6fa0b9d9d..709f96125da39 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -42,7 +42,6 @@ class ExtensionDtype: - __module__ = "pandas.api.extensions" """ A custom data type, to be paired with an ExtensionArray. @@ -112,6 +111,8 @@ class property**. provided for registering virtual subclasses. """ + __module__ = "pandas.api.extensions" + _metadata: tuple[str, ...] = () def __str__(self) -> str: diff --git a/pandas/core/flags.py b/pandas/core/flags.py index 52436eed55a15..a98380e9f7d16 100644 --- a/pandas/core/flags.py +++ b/pandas/core/flags.py @@ -8,7 +8,6 @@ class Flags: - __module__ = "pandas" """ Flags that apply to pandas objects. @@ -56,6 +55,8 @@ class Flags: """ + __module__ = "pandas" + _keys: set[str] = {"allows_duplicate_labels"} def __init__(self, obj: NDFrame, *, allows_duplicate_labels: bool) -> None: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index b2200469a8dec..a45ce1f385e4d 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -64,7 +64,6 @@ class Grouper: - __module__ = "pandas" """ A Grouper allows the user to specify a groupby instruction for an object. @@ -254,6 +253,8 @@ class Grouper: Freq: 17min, dtype: int64 """ + __module__ = "pandas" + sort: bool dropna: bool _grouper: Index | None diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 3e6d32637cfa5..2c2413c74f2fa 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -17,7 +17,6 @@ class BaseIndexer: - __module__ = "pandas.api.indexers" """ Base class for window bounds calculations. @@ -59,6 +58,8 @@ class BaseIndexer: 4 4.0 """ + __module__ = "pandas.api.indexers" + def __init__( self, index_array: np.ndarray | None = None, window_size: int = 0, **kwargs ) -> None: @@ -212,7 +213,6 @@ def get_window_bounds( class VariableOffsetWindowIndexer(BaseIndexer): - __module__ = "pandas.api.indexers" """ Calculate window boundaries based on a non-fixed offset such as a BusinessDay. @@ -273,6 +273,8 @@ class VariableOffsetWindowIndexer(BaseIndexer): 2020-01-10 9.0 """ + __module__ = "pandas.api.indexers" + def __init__( self, index_array: np.ndarray | None = None, @@ -436,7 +438,6 @@ def get_window_bounds( class FixedForwardWindowIndexer(BaseIndexer): - __module__ = "pandas.api.indexers" """ Creates window boundaries for fixed-length windows that include the current row. @@ -481,6 +482,8 @@ class FixedForwardWindowIndexer(BaseIndexer): 4 4.0 """ + __module__ = "pandas.api.indexers" + def get_window_bounds( self, num_values: int = 0, diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index bb6acd18ca014..fe06e235e0fd9 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -20,13 +20,14 @@ class FrozenList(PandasObject, list): - __module__ = "pandas.api.typing" """ Container that doesn't allow setting item *but* because it's technically hashable, will be used for lookups, appropriately, etc. """ + __module__ = "pandas.api.typing" + # Side note: This has to be of type list. Otherwise, # it messes up PyTables type checks. diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 81c4871bf9862..a817cae51be5c 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -77,7 +77,6 @@ def min_fitting_element(start: int, step: int, lower_limit: int) -> int: @set_module("pandas") class RangeIndex(Index): - __module__ = "pandas" """ Immutable Index implementing a monotonic integer range. @@ -138,6 +137,8 @@ class RangeIndex(Index): [] """ + __module__ = "pandas" + _typ = "rangeindex" _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") _range: range diff --git a/pandas/core/interchange/dataframe_protocol.py b/pandas/core/interchange/dataframe_protocol.py index a3c8cf7f75fe6..15bd323d5fade 100644 --- a/pandas/core/interchange/dataframe_protocol.py +++ b/pandas/core/interchange/dataframe_protocol.py @@ -363,7 +363,6 @@ def get_buffers(self) -> ColumnBuffers: class DataFrame(ABC): - __module__ = "pandas.api.interchange" """ A data frame class, with only the methods required by the interchange protocol defined. @@ -378,6 +377,8 @@ class DataFrame(ABC): to the dataframe interchange protocol specification. """ + __module__ = "pandas.api.interchange" + version = 0 # version of the protocol @abstractmethod diff --git a/pandas/core/resample.py b/pandas/core/resample.py index c63586c942f35..3a4ce952ffdcf 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -113,7 +113,6 @@ class Resampler(BaseGroupBy, PandasObject): - __module__ = "pandas.api.typing" """ Class for resampling datetimelike data, a groupby-like operation. See aggregate, transform, and apply functions on this object. @@ -134,6 +133,8 @@ class Resampler(BaseGroupBy, PandasObject): After resampling, see aggregate, apply, and transform functions. """ + __module__ = "pandas.api.typing" + _grouper: BinGrouper _timegrouper: TimeGrouper binner: DatetimeIndex | TimedeltaIndex | PeriodIndex # depends on subclass @@ -2170,11 +2171,12 @@ def _wrap_result(self, result): class DatetimeIndexResamplerGroupby( # type: ignore[misc] _GroupByMixin, DatetimeIndexResampler ): - __module__ = "pandas.api.typing" """ Provides a resample of a groupby implementation """ + __module__ = "pandas.api.typing" + @property def _resampler_cls(self): return DatetimeIndexResampler @@ -2272,11 +2274,12 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): class PeriodIndexResamplerGroupby( # type: ignore[misc] _GroupByMixin, PeriodIndexResampler ): - __module__ = "pandas.api.typing" """ Provides a resample of a groupby implementation. """ + __module__ = "pandas.api.typing" + @property def _resampler_cls(self): return PeriodIndexResampler @@ -2309,11 +2312,12 @@ def _adjust_binner_for_upsample(self, binner): class TimedeltaIndexResamplerGroupby( # type: ignore[misc] _GroupByMixin, TimedeltaIndexResampler ): - __module__ = "pandas.api.typing" """ Provides a resample of a groupby implementation. """ + __module__ = "pandas.api.typing" + @property def _resampler_cls(self): return TimedeltaIndexResampler @@ -2349,7 +2353,6 @@ def get_resampler_for_grouping( class TimeGrouper(Grouper): - __module__ = "pandas.api.typing" """ Custom groupby class for time-interval grouping. @@ -2362,6 +2365,8 @@ class TimeGrouper(Grouper): If axis is PeriodIndex """ + __module__ = "pandas.api.typing" + _attributes = Grouper._attributes + ( "closed", "label", diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 9e57738d620c7..1ea05e24d0db5 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -130,7 +130,6 @@ def _calculate_deltas( class ExponentialMovingWindow(BaseWindow): - __module__ = "pandas.api.typing" r""" Provide exponentially weighted (EW) calculations. @@ -317,6 +316,8 @@ class ExponentialMovingWindow(BaseWindow): 4 3.233686 """ + __module__ = "pandas.api.typing" + _attributes = [ "com", "span", @@ -904,11 +905,12 @@ def _cov(X, Y): class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow): - __module__ = "pandas.api.typing" """ Provide an exponential moving window groupby implementation. """ + __module__ = "pandas.api.typing" + _attributes = ExponentialMovingWindow._attributes + BaseWindowGroupby._attributes def __init__(self, obj, *args, _grouper=None, **kwargs) -> None: diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index b194ef9c674de..567ede27949d5 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -38,7 +38,6 @@ class Expanding(RollingAndExpandingMixin): - __module__ = "pandas.api.typing" """ Provide expanding window calculations. @@ -107,6 +106,8 @@ class Expanding(RollingAndExpandingMixin): 4 7.0 """ + __module__ = "pandas.api.typing" + _attributes: list[str] = ["min_periods", "method"] def __init__( @@ -1452,11 +1453,12 @@ def corr( class ExpandingGroupby(BaseWindowGroupby, Expanding): - __module__ = "pandas.api.typing" """ Provide a expanding groupby implementation. """ + __module__ = "pandas.api.typing" + _attributes = Expanding._attributes + BaseWindowGroupby._attributes def _get_window_indexer(self) -> GroupbyIndexer: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3f3ab0b6dc5e7..e6f84941f6b1a 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -856,7 +856,6 @@ def _gotitem(self, key, ndim, subset=None): class Window(BaseWindow): - __module__ = "pandas.api.typing" """ Provide rolling window calculations. @@ -1112,6 +1111,8 @@ class Window(BaseWindow): 2020-01-03 2020-01-02 6.0 """ + __module__ = "pandas.api.typing" + _attributes = [ "window", "min_periods", @@ -3532,11 +3533,12 @@ def corr( class RollingGroupby(BaseWindowGroupby, Rolling): - __module__ = "pandas.api.typing" """ Provide a rolling groupby implementation. """ + __module__ = "pandas.api.typing" + _attributes = Rolling._attributes + BaseWindowGroupby._attributes def _get_window_indexer(self) -> GroupbyIndexer: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index d258767fc2e70..bfa61253c9c1f 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -821,7 +821,6 @@ def read_json( class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]): - __module__ = "pandas.api.typing" """ JsonReader provides an interface for reading in a JSON file. @@ -830,6 +829,8 @@ class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]): whole document. """ + __module__ = "pandas.api.typing" + def __init__( self, filepath_or_buffer, diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 1daff7b3d0c7b..936cc4868daf2 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -38,11 +38,12 @@ class SASReader(Iterator["DataFrame"], ABC): - __module__ = "pandas.api.typing" """ Abstract class for XportReader and SAS7BDATReader. """ + __module__ = "pandas.api.typing" + @abstractmethod def read(self, nrows: int | None = None) -> DataFrame: ... diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 318c602155904..4687e242cb4d6 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -571,14 +571,33 @@ def test_set_module(): assert api.typing.DataFrameGroupBy.__module__ == "pandas.api.typing" -def get_objects(module_name, recurse): +def get_pandas_objects( + module_name: str, recurse: bool +) -> list[tuple[str, str, object]]: + """ + Get all pandas objects within a module. + + An object is determined to be part of pandas if it has a string + __module__ attribute that starts with ``"pandas"``. + + Parameters + ---------- + module_name : str + Name of the module to search. + recurse : bool + Whether to search submodules. + + Returns + ------- + List of all objects that are determined to be a part of pandas. + """ module = importlib.import_module(module_name) objs = [] for name, obj in inspect.getmembers(module): if inspect.isfunction(obj) or type(obj).__name__ == "cython_function_or_method": - # Sphinx does not use the __module__ attribute for functions, - # so we do not need to overwrite the attribute. + # We have not set __module__ on public functions; may do + # so in the future. continue module_dunder = getattr(obj, "__module__", None) if isinstance(module_dunder, str) and module_dunder.startswith("pandas"): @@ -587,19 +606,14 @@ def get_objects(module_name, recurse): if not recurse: return objs - paths = [str(pathlib.Path(module.__file__).parent)] - for _, submodule_name, is_pkg in pkgutil.walk_packages( - paths, module.__name__ + "." - ): - tail = submodule_name[submodule_name.rfind(".") + 1 :] - if tail.startswith("_"): + paths = [pathlib.Path(module.__file__).parent] + for module_info in pkgutil.walk_packages(paths): + name = module_info.name + if name.startswith("_") or name == "internals": continue - if submodule_name == "pandas.api.internals": - continue - try: - objs.extend(get_objects(submodule_name, recurse)) - except ImportError: - pass + objs.extend( + get_pandas_objects(f"{module.__name__}.{name}", recurse=module_info.ispkg) + ) return objs @@ -617,15 +631,18 @@ def get_objects(module_name, recurse): ], ) def test_attributes_module(module_name): - objs = get_objects(module_name, recurse=module_name != "pandas") + recurse = module_name not in ["pandas", "pandas.testing"] + objs = get_pandas_objects(module_name, recurse=recurse) failures = [ (module_name, name, type(obj), obj.__module__) for module_name, name, obj in objs - if ( - obj.__module__ != module_name - and obj.__module__ != "pandas" - # Can't seem to change __module__ - and name != "Interval" + if not ( + obj.__module__ == module_name + # Explicit exceptions + or ("Dtype" in name and obj.__module__ == "pandas") + or (name == "Categorical" and obj.__module__ == "pandas") + # TODO: Can't seem to change __module__ + or name == "Interval" ) ] assert len(failures) == 0, failures From 0dfe2cb1209dd0373078d350c8b56947e3e23846 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 18 Oct 2025 10:38:10 -0400 Subject: [PATCH 06/10] Fixes --- pandas/_typing.py | 12 ++++++------ pandas/tests/api/test_api.py | 2 ++ pandas/tests/series/methods/test_astype.py | 5 +---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index f23dc529f2f83..23598bd2bc517 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -107,7 +107,7 @@ class SequenceNotStr(Protocol[_T_co]): - __module__ = "pandas.api.typing.aliases" + __module__: str = "pandas.api.typing.aliases" @overload def __getitem__(self, index: SupportsIndex, /) -> _T_co: ... @@ -280,7 +280,7 @@ def tell(self) -> int: class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]): - __module__ = "pandas.api.typing.aliases" + __module__: str = "pandas.api.typing.aliases" def read(self, n: int = ..., /) -> AnyStr_co: # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File @@ -288,7 +288,7 @@ def read(self, n: int = ..., /) -> AnyStr_co: class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]): - __module__ = "pandas.api.typing.aliases" + __module__: str = "pandas.api.typing.aliases" def write(self, b: AnyStr_contra, /) -> Any: # for gzip.GzipFile, bz2.BZ2File @@ -300,19 +300,19 @@ def flush(self) -> Any: class ReadPickleBuffer(ReadBuffer[bytes], Protocol): - __module__ = "pandas.api.typing.aliases" + __module__: str = "pandas.api.typing.aliases" def readline(self) -> bytes: ... class WriteExcelBuffer(WriteBuffer[bytes], Protocol): - __module__ = "pandas.api.typing.aliases" + __module__: str = "pandas.api.typing.aliases" def truncate(self, size: int | None = ..., /) -> int: ... class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol): - __module__ = "pandas.api.typing.aliases" + __module__: str = "pandas.api.typing.aliases" def __iter__(self) -> Iterator[AnyStr_co]: # for engine=python diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 4687e242cb4d6..a520f68e2009b 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -606,6 +606,8 @@ def get_pandas_objects( if not recurse: return objs + # __file__ can, but shouldn't, be None + assert isinstance(module.__file__, str) paths = [pathlib.Path(module.__file__).parent] for module_info in pkgutil.walk_packages(paths): name = module_info.name diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index aa38e63c826f6..81648377942f7 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -579,10 +579,7 @@ def test_astype_categorical_invalid_conversions(self): ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values() ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) - msg = ( - "dtype '' " - "not understood" - ) + msg = "dtype '' not understood" with pytest.raises(TypeError, match=msg): ser.astype(Categorical) with pytest.raises(TypeError, match=msg): From 6ceda9c943fc65f39ffb1146ed636b8f1bcb630f Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 18 Oct 2025 11:07:42 -0400 Subject: [PATCH 07/10] Fixup --- ci/code_checks.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 68ca06564d3a6..3b1af77e5f70e 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -73,10 +73,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Period.freq GL08" \ -i "pandas.Period.ordinal GL08" \ -i "pandas.errors.IncompatibleFrequency SA01,SS06,EX01" \ + -i "pandas.errors.InvalidVersion GL08" \ -i "pandas.api.extensions.ExtensionArray.value_counts EX01,RT03,SA01" \ - -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \ - -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \ - -i "pandas.core.resample.Resampler.quantile PR01,PR07" \ + -i "pandas.api.typing.DataFrameGroupBy.plot PR02" \ + -i "pandas.api.typing.SeriesGroupBy.plot PR02" \ + -i "pandas.api.typing.Resampler.quantile PR01,PR07" \ + -i "pandas.arrays.NumpyExtensionArray GL08" \ -i "pandas.tseries.offsets.BDay PR02,SA01" \ -i "pandas.tseries.offsets.BHalfYearBegin.is_on_offset GL08" \ -i "pandas.tseries.offsets.BHalfYearBegin.n GL08" \ From b0930913834efd0d470554af0d7a29a473c84607 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 18 Oct 2025 16:40:49 -0400 Subject: [PATCH 08/10] Refinements --- pandas/_config/config.py | 7 ++----- pandas/_libs/interval.pyx | 1 + pandas/core/dtypes/common.py | 2 +- pandas/tests/api/test_api.py | 3 ++- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 210c57ef79698..35949d7683abc 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -413,10 +413,6 @@ def __init__(self, d: dict[str, Any], prefix: str = "") -> None: object.__setattr__(self, "prefix", prefix) def __setattr__(self, key: str, val: Any) -> None: - if key == "__module__": - # Need to be able to set __module__ to pandas for pandas.options - super().__setattr__(key, val) - return prefix = object.__getattribute__(self, "prefix") if prefix: prefix += "." @@ -447,7 +443,8 @@ def __dir__(self) -> list[str]: options = DictWrapper(_global_config) -options.__module__ = "pandas" +# DictWrapper defines a custom setattr +object.__setattr__(options, "__module__", "pandas") # # Functions for use by pandas developers, in addition to User - api diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 5d0876591a151..ad32adf5d19f9 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -382,6 +382,7 @@ cdef class Interval(IntervalMixin): >>> year_2017.length Timedelta('365 days 00:00:00') """ + __module__ = "pandas" _typ = "interval" __array_priority__ = 1000 diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 4691607a1d12a..fc7cc59ecfb6a 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -359,7 +359,7 @@ def is_datetime64tz_dtype(arr_or_dtype) -> bool: >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True - >>> from pandas.api.types import DatetimeTZDtype + >>> from pandas import DatetimeTZDtype >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") >>> s = pd.Series([], dtype=dtype) >>> is_datetime64tz_dtype(dtype) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index a520f68e2009b..4b818ba08020f 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -643,7 +643,8 @@ def test_attributes_module(module_name): # Explicit exceptions or ("Dtype" in name and obj.__module__ == "pandas") or (name == "Categorical" and obj.__module__ == "pandas") - # TODO: Can't seem to change __module__ + # Setting __module__ on a cdef class has no effect + # https://github.com/cython/cython/issues/7231 or name == "Interval" ) ] From 138bb421a00c410c6f0cb124c158dd982ca70331 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 18 Oct 2025 17:39:22 -0400 Subject: [PATCH 09/10] Resolve Interval --- meson.build | 3 +++ pandas/_libs/interval.pyx | 1 + pandas/tests/api/test_api.py | 3 --- setup.py | 5 ++++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/meson.build b/meson.build index 6a00e52481108..156dbb6c63e7c 100644 --- a/meson.build +++ b/meson.build @@ -18,6 +18,9 @@ versioneer = files('generate_version.py') add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language: 'c') add_project_arguments('-DNPY_NO_DEPRECATED_API=0', language: 'cpp') +# Enables settings __module__ on cdef classes +# https://github.com/cython/cython/issues/7231 +add_project_arguments('-DCYTHON_USE_TYPE_SPECS=1', language: 'c') # Allow supporting older numpys than the version compiled against # Set the define to the min supported version of numpy for pandas diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index ad32adf5d19f9..a1cd4c9d15447 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -445,6 +445,7 @@ cdef class Interval(IntervalMixin): >>> interval.closed 'left' """ + __module__ = "pandas" def __init__(self, left, right, str closed="right"): # note: it is faster to just do these checks than to use a special diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 4b818ba08020f..6cf182b65cdb9 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -643,9 +643,6 @@ def test_attributes_module(module_name): # Explicit exceptions or ("Dtype" in name and obj.__module__ == "pandas") or (name == "Categorical" and obj.__module__ == "pandas") - # Setting __module__ on a cdef class has no effect - # https://github.com/cython/cython/issues/7231 - or name == "Interval" ) ] assert len(failures) == 0, failures diff --git a/setup.py b/setup.py index db1852b43cfa9..a2d7e3f98617c 100755 --- a/setup.py +++ b/setup.py @@ -321,7 +321,7 @@ def run(self) -> None: endian_macro = [("__LITTLE_ENDIAN__", "1")] -extra_compile_args = [] +extra_compile_args = ["-DCYTHON_USE_TYPE_SPECS=1"] extra_link_args = [] if is_platform_windows(): if debugging_symbols_requested: @@ -569,6 +569,9 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): extra_compile_args.append("-qlanglvl=extended0x:nolibext") undef_macros.append("_POSIX_THREADS") + print("-" * 80) + print(extra_compile_args) + print("-" * 80) obj = Extension( f"pandas.{name}", sources=sources, From 0be1e8061e707eb4e753c5f2962b44fae5cf8cd6 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 18 Oct 2025 17:46:09 -0400 Subject: [PATCH 10/10] Cleanup --- setup.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/setup.py b/setup.py index a2d7e3f98617c..db1852b43cfa9 100755 --- a/setup.py +++ b/setup.py @@ -321,7 +321,7 @@ def run(self) -> None: endian_macro = [("__LITTLE_ENDIAN__", "1")] -extra_compile_args = ["-DCYTHON_USE_TYPE_SPECS=1"] +extra_compile_args = [] extra_link_args = [] if is_platform_windows(): if debugging_symbols_requested: @@ -569,9 +569,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): extra_compile_args.append("-qlanglvl=extended0x:nolibext") undef_macros.append("_POSIX_THREADS") - print("-" * 80) - print(extra_compile_args) - print("-" * 80) obj = Extension( f"pandas.{name}", sources=sources,