From 1f93779575610596dba79789f750ef37ce46802e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Jan 2022 17:16:12 -0600 Subject: [PATCH 01/28] ENH: add NDArrayBackedExtensionArray to public API --- pandas/api/extensions/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index ea5f1ba926899..7b8444ba91876 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -19,6 +19,7 @@ ExtensionArray, ExtensionScalarOpsMixin, ) +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray __all__ = [ "no_default", @@ -30,4 +31,5 @@ "take", "ExtensionArray", "ExtensionScalarOpsMixin", + "NDArrayBackedExtensionArray", ] From 522b548bdd2204f2ef0f0f2db412a67bf64a254c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Jan 2022 17:17:26 -0600 Subject: [PATCH 02/28] add whatsnew --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c688ced673514..5d8c32992ec87 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -34,7 +34,7 @@ Other enhancements - :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`) - Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`) - :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) -- +- :class:`NDArrayBackedExtensionArray` now exposed in the public API. (:issue:`45544`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From 945f8404b124f9966b797d9b67d73b7bb01944d2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 10:09:04 -0600 Subject: [PATCH 03/28] add NDArrayBackedExtensionArray to pandas.core.arrays.__init__ --- pandas/api/extensions/__init__.py | 2 +- pandas/core/arrays/__init__.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index 7b8444ba91876..2c3fee93fffe0 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -18,8 +18,8 @@ from pandas.core.arrays import ( ExtensionArray, ExtensionScalarOpsMixin, + NDArrayBackedExtensionArray, ) -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray __all__ = [ "no_default", diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index e301e82a0ee75..5ce1480d5506e 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -1,3 +1,4 @@ +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.base import ( ExtensionArray, ExtensionOpsMixin, @@ -32,6 +33,7 @@ "FloatingArray", "IntegerArray", "IntervalArray", + "NDArrayBackedExtensionArray", "PandasArray", "PeriodArray", "period_array", From 721ae110e3a39bfa0f8ce065b6b914fbe9e7e734 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 10:22:40 -0600 Subject: [PATCH 04/28] add tests for extensions api --- pandas/tests/api/test_api.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 2e306c76d246c..8769cc7cc91da 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -8,6 +8,7 @@ import pandas as pd from pandas import api import pandas._testing as tm +from pandas.api import extensions class Base: @@ -280,6 +281,33 @@ def test_api(self): self.check(api, self.allowed) +class TestExtensions(Base): + # top-level classes + classes = [ + "ExtensionDtype", + "ExtensionArray", + "ExtensionScalarOpsMixin", + "NDArrayBackedExtensionArray", + ] + + # top-level functions + funcs = [ + "register_extension_dtype", + "register_dataframe_accessor", + "register_index_accessor", + "register_series_accessor", + "take", + ] + + # misc + misc = ["no_default"] + + def test_api(self): + checkthese = self.classes + self.funcs + self.misc + + self.check(namespace=extensions, expected=checkthese) + + class TestTesting(Base): funcs = [ "assert_frame_equal", From ae68f9ddd8797d20019c6854ecda24159761d901 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 11:07:05 -0600 Subject: [PATCH 05/28] add docs --- doc/source/development/extending.rst | 8 ++++++++ doc/source/reference/extensions.rst | 1 + 2 files changed, 9 insertions(+) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 5347aab2c731a..20bd1604afde7 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -134,6 +134,14 @@ by some other storage type, like Python lists. See the `extension array source`_ for the interface definition. The docstrings and comments contain guidance for properly implementing the interface. +:class:`~pandas.api.extensions.NDArrayBackedExtensionArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For ExtensionArrays backed by a single NumPy array, the +:class:`~pandas.api.extensions.NDArrayBackedExtensionArray` class can save you +some effort. It contains a private property ``_ndarray`` with the backing NumPy +array and implements the extension array interface. + .. _extending.extension.operator: :class:`~pandas.api.extensions.ExtensionArray` operator support diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index ce8d8d5c2ca10..cfe7878dbc977 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -24,6 +24,7 @@ objects. :template: autosummary/class_without_autosummary.rst api.extensions.ExtensionArray + api.extensions.NDArrayBackedExtensionArray arrays.PandasArray .. We need this autosummary so that methods and attributes are generated. From 38113c818f3a0d45c456515ea8fc1b003228b600 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 14:29:37 -0600 Subject: [PATCH 06/28] add autosummary for methods and attributes --- doc/source/reference/extensions.rst | 32 +++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index cfe7878dbc977..f7a984ce50848 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -63,6 +63,38 @@ objects. api.extensions.ExtensionArray.ndim api.extensions.ExtensionArray.shape api.extensions.ExtensionArray.tolist + api.extensions.NDArrayBackedExtensionArray.dtype + api.extensions.NDArrayBackedExtensionArray.T + api.extensions.NDArrayBackedExtensionArray.nbytes + api.extensions.NDArrayBackedExtensionArray.ndim + api.extensions.NDArrayBackedExtensionArray.shape + api.extensions.NDArrayBackedExtensionArray.size + api.extensions.NDArrayBackedExtensionArray.argmax + api.extensions.NDArrayBackedExtensionArray.argmin + api.extensions.NDArrayBackedExtensionArray.argsort + api.extensions.NDArrayBackedExtensionArray.astype + api.extensions.NDArrayBackedExtensionArray.dropna + api.extensions.NDArrayBackedExtensionArray.equals + api.extensions.NDArrayBackedExtensionArray.factorize + api.extensions.NDArrayBackedExtensionArray.fillna + api.extensions.NDArrayBackedExtensionArray.insert + api.extensions.NDArrayBackedExtensionArray.isin + api.extensions.NDArrayBackedExtensionArray.isna + api.extensions.NDArrayBackedExtensionArray.searchsorted + api.extensions.NDArrayBackedExtensionArray.shift + api.extensions.NDArrayBackedExtensionArray.take + api.extensions.NDArrayBackedExtensionArray.to_numpy + api.extensions.NDArrayBackedExtensionArray.tolist + api.extensions.NDArrayBackedExtensionArray.unique + api.extensions.NDArrayBackedExtensionArray.value_counts + api.extensions.NDArrayBackedExtensionArray.view + api.extensions.NDArrayBackedExtensionArray.copy + api.extensions.NDArrayBackedExtensionArray.delete + api.extensions.NDArrayBackedExtensionArray.ravel + api.extensions.NDArrayBackedExtensionArray.repeat + api.extensions.NDArrayBackedExtensionArray.reshape + api.extensions.NDArrayBackedExtensionArray.swapaxes + api.extensions.NDArrayBackedExtensionArray.transpose Additionally, we have some utility methods for ensuring your object behaves correctly. From 18ec784440632a8950262f5bad39ce5298f8b2fd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 15:55:11 -0600 Subject: [PATCH 07/28] remove unreferenced methods from docs --- doc/source/reference/extensions.rst | 12 ------------ pandas/core/arrays/_mixins.py | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index f7a984ce50848..8bad4e9a5e85a 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -64,11 +64,6 @@ objects. api.extensions.ExtensionArray.shape api.extensions.ExtensionArray.tolist api.extensions.NDArrayBackedExtensionArray.dtype - api.extensions.NDArrayBackedExtensionArray.T - api.extensions.NDArrayBackedExtensionArray.nbytes - api.extensions.NDArrayBackedExtensionArray.ndim - api.extensions.NDArrayBackedExtensionArray.shape - api.extensions.NDArrayBackedExtensionArray.size api.extensions.NDArrayBackedExtensionArray.argmax api.extensions.NDArrayBackedExtensionArray.argmin api.extensions.NDArrayBackedExtensionArray.argsort @@ -88,13 +83,6 @@ objects. api.extensions.NDArrayBackedExtensionArray.unique api.extensions.NDArrayBackedExtensionArray.value_counts api.extensions.NDArrayBackedExtensionArray.view - api.extensions.NDArrayBackedExtensionArray.copy - api.extensions.NDArrayBackedExtensionArray.delete - api.extensions.NDArrayBackedExtensionArray.ravel - api.extensions.NDArrayBackedExtensionArray.repeat - api.extensions.NDArrayBackedExtensionArray.reshape - api.extensions.NDArrayBackedExtensionArray.swapaxes - api.extensions.NDArrayBackedExtensionArray.transpose Additionally, we have some utility methods for ensuring your object behaves correctly. diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index a40be5a988f26..80dcb8734f65a 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -112,6 +112,13 @@ def _validate_scalar(self, value): # ------------------------------------------------------------------------ def view(self, dtype: Dtype | None = None) -> ArrayLike: + """ + Return a view on the array. + + See also + -------- + pandas.api.extensions.ExtensionArray.view + """ # We handle datetime64, datetime64tz, timedelta64, and period # dtypes here. Everything else we pass through to the underlying # ndarray. @@ -152,6 +159,14 @@ def take( fill_value: Any = None, axis: int = 0, ) -> NDArrayBackedExtensionArrayT: + """ + Take elements from an array. + + See also + -------- + pandas.api.extensions.ExtensionArray.take + """ + if allow_fill: fill_value = self._validate_scalar(fill_value) From 2919f6051108ffa6c135c8630a65e34b9d30c940 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 25 Jan 2022 09:33:10 -0600 Subject: [PATCH 08/28] fix docstrings --- pandas/core/arrays/_mixins.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 80dcb8734f65a..9494aa29d3fcc 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -115,7 +115,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: """ Return a view on the array. - See also + Returns + ------- + ExtensionArray or np.ndarray + A view on the :class:`ExtensionArray`'s data. + + See Also -------- pandas.api.extensions.ExtensionArray.view """ @@ -162,7 +167,11 @@ def take( """ Take elements from an array. - See also + Returns + ------- + NDArrayBackedExtensionArray + + See Also -------- pandas.api.extensions.ExtensionArray.take """ From 319ac2b5f93c282042e64e70f666c35cce86c913 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 26 Jan 2022 10:11:10 -0600 Subject: [PATCH 09/28] use doc decorator --- pandas/core/arrays/_mixins.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 9494aa29d3fcc..3c4ecc1418c42 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -111,19 +111,8 @@ def _validate_scalar(self, value): # ------------------------------------------------------------------------ + @doc(ExtensionArray.view) def view(self, dtype: Dtype | None = None) -> ArrayLike: - """ - Return a view on the array. - - Returns - ------- - ExtensionArray or np.ndarray - A view on the :class:`ExtensionArray`'s data. - - See Also - -------- - pandas.api.extensions.ExtensionArray.view - """ # We handle datetime64, datetime64tz, timedelta64, and period # dtypes here. Everything else we pass through to the underlying # ndarray. @@ -156,6 +145,7 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" return arr.view(dtype=dtype) # type: ignore[arg-type] + @doc(ExtensionArray.view) def take( self: NDArrayBackedExtensionArrayT, indices: TakeIndexer, @@ -164,18 +154,6 @@ def take( fill_value: Any = None, axis: int = 0, ) -> NDArrayBackedExtensionArrayT: - """ - Take elements from an array. - - Returns - ------- - NDArrayBackedExtensionArray - - See Also - -------- - pandas.api.extensions.ExtensionArray.take - """ - if allow_fill: fill_value = self._validate_scalar(fill_value) From 8513863c6c4334570ef27add0d49c9d135928f2a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 26 Jan 2022 10:37:05 -0600 Subject: [PATCH 10/28] add code samples and reference to test suite --- doc/source/development/extending.rst | 35 +++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 20bd1604afde7..5a6cc7b45e2b5 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -140,7 +140,40 @@ and comments contain guidance for properly implementing the interface. For ExtensionArrays backed by a single NumPy array, the :class:`~pandas.api.extensions.NDArrayBackedExtensionArray` class can save you some effort. It contains a private property ``_ndarray`` with the backing NumPy -array and implements the extension array interface. +array and implements the extension array interface. Implement the ``_box_func`` +method to convert from array values to the type you wish to expose to users. +Implement the ``_validate_scalar`` method to convert from an object to a value +which can be stored in the NumPy array. + +.. code-block:: python + + class CustomArray(NDArrayBackedExtensionArray): + def __init__(self, values): + backing_array_dtype = "int64" + super().__init__(values=values, dtype=backing_array_dtype) + + def _box_func(self, value): + scalar = CustomObject(value) + return scalar + + def _validate_scalar(self, scalar): + if not isinstance(scalar, CustomObject): + raise TypeError("can't convert scalar of this type") + return scalar.convert_to_int64() + +Optionally, subclass :class:`pandas.tests.extension.base.NDArrayBacked2DTests` +in your test suite to validate your implementation. + +.. code-block:: python + + @pytest.fixture + def data(): + return CustomArray(numpy.arange(-10, 10, 1) + + + class Test2DCompat(base.NDArrayBacked2DTests): + pass + .. _extending.extension.operator: From cc75eda45c173b34380880321293c7e0b1b8ea00 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 6 Apr 2022 16:42:03 -0500 Subject: [PATCH 11/28] add missing methods to extension docs --- doc/source/development/extending.rst | 87 +++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 5a6cc7b45e2b5..3276ca090d20e 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -140,29 +140,81 @@ and comments contain guidance for properly implementing the interface. For ExtensionArrays backed by a single NumPy array, the :class:`~pandas.api.extensions.NDArrayBackedExtensionArray` class can save you some effort. It contains a private property ``_ndarray`` with the backing NumPy -array and implements the extension array interface. Implement the ``_box_func`` -method to convert from array values to the type you wish to expose to users. -Implement the ``_validate_scalar`` method to convert from an object to a value -which can be stored in the NumPy array. +array and implements the extension array interface. + +Implement the following: + +``_box_func`` + Convert from array values to the type you wish to expose to users. + +``_internal_fill_value`` + Scalar used to denote ``NA`` value inside our ``self._ndarray``, e.g. ``-1`` + for ``Categorical``, ``iNaT`` for ``Period``. + +``_validate_scalar`` + Convert from an object to a value which can be stored in the NumPy array. + +``_validate_setitem_value`` + Convert a value or values for use in setting a value or values in the backing + NumPy array. + +``_validate_searchsorted_value`` + Convert a value for use in searching for a value in the backing NumPy array. .. code-block:: python - class CustomArray(NDArrayBackedExtensionArray): + class DateArray(NDArrayBackedExtensionArray): + _internal_fill_value = numpy.datetime64("NaT") + def __init__(self, values): - backing_array_dtype = "int64" + backing_array_dtype = " Date: Fri, 26 Aug 2022 17:30:21 -0500 Subject: [PATCH 12/28] clarify _validate_searchsorted_value and 2d backing array --- doc/source/development/extending.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 17e761daf3f89..b74713e7102dc 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -161,6 +161,8 @@ Implement the following: ``_validate_searchsorted_value`` Convert a value for use in searching for a value in the backing NumPy array. + Note: in most cases, the implementation can be identical to that of + ``_validate_setitem_value``. .. code-block:: python @@ -196,7 +198,7 @@ Implement the following: To support 2D arrays, use the ``_from_backing_data`` helper function when a -method is called on multi-dimensional data. +method is called on multi-dimensional data of the same dtype as ``_ndarray``. .. code-block:: python From 38018e618428538e1ab79f03d4da194d53653551 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 23 Nov 2022 09:35:51 -0600 Subject: [PATCH 13/28] DOC: make insert docstring have single line summary --- pandas/core/arrays/_mixins.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 186526ac99227..1ceffac10dc11 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -390,8 +390,9 @@ def insert( self: NDArrayBackedExtensionArrayT, loc: int, item ) -> NDArrayBackedExtensionArrayT: """ - Make new ExtensionArray inserting new item at location. Follows - Python list.append semantics for negative values. + Make new ExtensionArray inserting new item at location. + + Follows Python list.append semantics for negative values. Parameters ---------- From a5ac8bad9d7b0197ef61324b083702e7c4fb20bf Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 6 Jan 2024 21:09:43 +0000 Subject: [PATCH 14/28] datearray changes --- doc/source/development/extending.rst | 5 ----- doc/source/whatsnew/v2.0.0.rst | 2 -- 2 files changed, 7 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index b74713e7102dc..9f1a427744379 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -159,11 +159,6 @@ Implement the following: Convert a value or values for use in setting a value or values in the backing NumPy array. -``_validate_searchsorted_value`` - Convert a value for use in searching for a value in the backing NumPy array. - Note: in most cases, the implementation can be identical to that of - ``_validate_setitem_value``. - .. code-block:: python class DateArray(NDArrayBackedExtensionArray): diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index f2150c609d3db..58c491f770cba 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -65,8 +65,6 @@ Other enhancements - :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`) - :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) - Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`) -- :class:`NDArrayBackedExtensionArray` now exposed in the public API. (:issue:`45544`) -- .. --------------------------------------------------------------------------- .. _whatsnew_200.notable_bug_fixes: From 0e674d4c33dc9b97b66e6f30739aa93e284f7953 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 6 Jan 2024 21:25:13 +0000 Subject: [PATCH 15/28] whatsnew --- doc/source/whatsnew/v2.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 0b04a1d313a6d..f4a0726ef8630 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -329,6 +329,7 @@ Other enhancements - Improved error message that appears in :meth:`DatetimeIndex.to_period` with frequencies which are not supported as period frequencies, such as ``"BMS"`` (:issue:`56243`) - Improved error message when constructing :class:`Period` with invalid offsets such as ``"QS"`` (:issue:`55785`) - The dtypes ``string[pyarrow]`` and ``string[pyarrow_numpy]`` now both utilize the ``large_string`` type from PyArrow to avoid overflow for long columns (:issue:`56259`) +- :class:`NDArrayBackedExtensionArray` now exposed in the public API (:issue:`45544`) .. --------------------------------------------------------------------------- .. _whatsnew_220.notable_bug_fixes: From f7e353ae047f2a0517b46c1dcbdb7efb4ec05c21 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 6 Jan 2024 22:07:08 +0000 Subject: [PATCH 16/28] test ndbacked is part of api --- pandas/tests/api/test_api.py | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 6b8b716ba1673..2a8809903ca78 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -336,6 +336,7 @@ class TestApi(Base): "take", "ExtensionArray", "ExtensionScalarOpsMixin", + "NDArrayBackedExtensionArray", ] def test_api(self): @@ -356,34 +357,6 @@ def test_api_indexers(self): def test_api_extensions(self): self.check(api_extensions, self.allowed_api_extensions) - -class TestExtensions(Base): - # top-level classes - classes = [ - "ExtensionDtype", - "ExtensionArray", - "ExtensionScalarOpsMixin", - "NDArrayBackedExtensionArray", - ] - - # top-level functions - funcs = [ - "register_extension_dtype", - "register_dataframe_accessor", - "register_index_accessor", - "register_series_accessor", - "take", - ] - - # misc - misc = ["no_default"] - - def test_api(self): - checkthese = self.classes + self.funcs + self.misc - - self.check(namespace=extensions, expected=checkthese) - - class TestTesting(Base): funcs = [ "assert_frame_equal", From 01191d169059850bf2c1adabe2c5d09c4495d96f Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 7 Jan 2024 10:19:03 +0000 Subject: [PATCH 17/28] docstrings, extending docs --- doc/source/development/extending.rst | 5 +--- pandas/core/arrays/_mixins.py | 34 +++++++--------------------- pandas/tests/api/test_api.py | 1 + 3 files changed, 10 insertions(+), 30 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 41b8a73def3fe..97ed70faa35bc 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -185,12 +185,9 @@ Implement the following: def _validate_setitem_value(self, value): if pandas.api.types.is_list_like(value): - return [self._validate_scalar(v) for v in value] + return np.array([self._validate_scalar(v) for v in value], dtype = self.dtype) return self._validate_scalar(value) - def _validate_searchsorted_value(self, value): - return self._validate_setitem_value(value) - To support 2D arrays, use the ``_from_backing_data`` helper function when a method is called on multi-dimensional data of the same dtype as ``_ndarray``. diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 6a307fa7fd3c2..df326d884992b 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -92,6 +92,12 @@ def method(self, *args, **kwargs): class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): """ ExtensionArray that is backed by a single NumPy ndarray. + + Examples + -------- + Please see the following: + + https://pandas.pydata.org/docs/development/extending.html#NDArrayBackedExtensionArray """ _ndarray: np.ndarray @@ -442,21 +448,8 @@ def _where(self: Self, mask: npt.NDArray[np.bool_], value) -> Self: # ------------------------------------------------------------------------ # Index compat methods + @doc(ExtensionArray.insert) def insert(self, loc: int, item) -> Self: - """ - Make new ExtensionArray inserting new item at location. - - Follows Python list.append semantics for negative values. - - Parameters - ---------- - loc : int - item : object - - Returns - ------- - type(self) - """ loc = validate_insert_loc(loc, len(self)) code = self._validate_scalar(item) @@ -475,19 +468,8 @@ def insert(self, loc: int, item) -> Self: # These are not part of the EA API, but we implement them because # pandas assumes they're there. + @doc(ExtensionArray.value_counts) def value_counts(self, dropna: bool = True) -> Series: - """ - Return a Series containing counts of unique values. - - Parameters - ---------- - dropna : bool, default True - Don't include counts of NA values. - - Returns - ------- - Series - """ if self.ndim != 1: raise NotImplementedError diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 2a8809903ca78..d80d9f564c478 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -357,6 +357,7 @@ def test_api_indexers(self): def test_api_extensions(self): self.check(api_extensions, self.allowed_api_extensions) + class TestTesting(Base): funcs = [ "assert_frame_equal", From ce4eeef166649fe85dac086541efaee77152e84f Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 7 Jan 2024 10:23:49 +0000 Subject: [PATCH 18/28] whatsnew --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index f4a0726ef8630..639910a613bfe 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -313,6 +313,7 @@ Other enhancements - :meth:`~DataFrame.to_sql` with method parameter set to ``multi`` works with Oracle on the backend - :attr:`Series.attrs` / :attr:`DataFrame.attrs` now uses a deepcopy for propagating ``attrs`` (:issue:`54134`). +- :class:`NDArrayBackedExtensionArray` now exposed in the public API (:issue:`45544`) - :func:`get_dummies` now returning extension dtypes ``boolean`` or ``bool[pyarrow]`` that are compatible with the input dtype (:issue:`56273`) - :func:`read_csv` now supports ``on_bad_lines`` parameter with ``engine="pyarrow"`` (:issue:`54480`) - :func:`read_sas` returns ``datetime64`` dtypes with resolutions better matching those stored natively in SAS, and avoids returning object-dtype in cases that cannot be stored with ``datetime64[ns]`` dtype (:issue:`56127`) @@ -329,7 +330,6 @@ Other enhancements - Improved error message that appears in :meth:`DatetimeIndex.to_period` with frequencies which are not supported as period frequencies, such as ``"BMS"`` (:issue:`56243`) - Improved error message when constructing :class:`Period` with invalid offsets such as ``"QS"`` (:issue:`55785`) - The dtypes ``string[pyarrow]`` and ``string[pyarrow_numpy]`` now both utilize the ``large_string`` type from PyArrow to avoid overflow for long columns (:issue:`56259`) -- :class:`NDArrayBackedExtensionArray` now exposed in the public API (:issue:`45544`) .. --------------------------------------------------------------------------- .. _whatsnew_220.notable_bug_fixes: From 7019bc7349e00830ba77d01883f10f0bff513dd0 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 13 Jan 2024 13:17:27 +0000 Subject: [PATCH 19/28] docstring --- pandas/core/arrays/_mixins.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index df326d884992b..750db40b7c094 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -468,8 +468,23 @@ def insert(self, loc: int, item) -> Self: # These are not part of the EA API, but we implement them because # pandas assumes they're there. - @doc(ExtensionArray.value_counts) def value_counts(self, dropna: bool = True) -> Series: + """ + Return a Series containing counts of each unique value. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of missing values. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ if self.ndim != 1: raise NotImplementedError From 5f99f573f9b95fba9e936283bb6d5757e1e56489 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 13 Jan 2024 13:24:33 +0000 Subject: [PATCH 20/28] aggressive docstring --- pandas/core/arrays/_mixins.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 750db40b7c094..be591b9520fbf 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -93,6 +93,12 @@ class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): """ ExtensionArray that is backed by a single NumPy ndarray. + Notes + ----- + This class is part of the public API, but may be adjusted in non-user-facing ways + more aggressively than the regular API. + + Examples -------- Please see the following: From 47f8917f688f2afc028abf4fb1934e7f21aeb65d Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 13 Jan 2024 13:34:23 +0000 Subject: [PATCH 21/28] lint --- pandas/core/arrays/_mixins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index be591b9520fbf..256fcaa6770d3 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -95,7 +95,7 @@ class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): Notes ----- - This class is part of the public API, but may be adjusted in non-user-facing ways + This class is part of the public API, but may be adjusted in non-user-facing ways more aggressively than the regular API. From 4f8c0552300dcc7c501a440f8248b36c22f955b7 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 13 Jan 2024 13:41:55 +0000 Subject: [PATCH 22/28] lint --- pandas/core/arrays/_mixins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 256fcaa6770d3..ca541ec1b0232 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -95,8 +95,8 @@ class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): Notes ----- - This class is part of the public API, but may be adjusted in non-user-facing ways - more aggressively than the regular API. + This class is part of the public API, but may be adjusted in non-user-facing + ways more aggressively than the regular API. Examples From 1aaaa9a1bf473fd0d3541fd0b1ee3aed39f994c9 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 13 Jan 2024 14:45:43 +0000 Subject: [PATCH 23/28] docstrings --- pandas/core/arrays/_mixins.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index ca541ec1b0232..2c7356f7700a1 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -98,7 +98,6 @@ class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): This class is part of the public API, but may be adjusted in non-user-facing ways more aggressively than the regular API. - Examples -------- Please see the following: @@ -485,11 +484,7 @@ def value_counts(self, dropna: bool = True) -> Series: Returns ------- - counts : Series - - See Also - -------- - Series.value_counts + Series """ if self.ndim != 1: raise NotImplementedError From 8a66d3ac2208533d427ca1653b644da63aff39f2 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 13 Jan 2024 14:52:52 +0000 Subject: [PATCH 24/28] to_numpy example --- pandas/core/arrays/base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 58264f2aef6f3..cfecf930e0a2d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -567,6 +567,12 @@ def to_numpy( Returns ------- numpy.ndarray + + Examples + ------- + >>> arr = pd.array([4, 5]) + >>> arr.to_numpy() + array([4, 5], dtype=object) """ result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: From a8fe040debc9f0f6d70311007e642fae289fab29 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 13 Jan 2024 15:42:54 +0000 Subject: [PATCH 25/28] to_numpy example --- pandas/core/arrays/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index cfecf930e0a2d..94a5f8725ff1b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -572,7 +572,7 @@ def to_numpy( ------- >>> arr = pd.array([4, 5]) >>> arr.to_numpy() - array([4, 5], dtype=object) + array([4, 5]) """ result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: From f2cbd4bcbc03fd8f0a0f1d9161dd5bb0f2cab181 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 13 Jan 2024 16:48:50 +0000 Subject: [PATCH 26/28] remove ndarray example --- pandas/core/arrays/base.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 94a5f8725ff1b..58264f2aef6f3 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -567,12 +567,6 @@ def to_numpy( Returns ------- numpy.ndarray - - Examples - ------- - >>> arr = pd.array([4, 5]) - >>> arr.to_numpy() - array([4, 5]) """ result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: From 552f7a363ca266300c1d01b4a4baa629fdc17c83 Mon Sep 17 00:00:00 2001 From: Andrew Date: Sat, 13 Jan 2024 17:00:05 +0000 Subject: [PATCH 27/28] value_counts example --- pandas/core/arrays/_mixins.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 2c7356f7700a1..81a56cc0f7876 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -485,6 +485,14 @@ def value_counts(self, dropna: bool = True) -> Series: Returns ------- Series + + Examples + -------- + >>> arr = pd.array([4, 5]) + >>> arr.value_counts() + 4 1 + 5 1 + Name: count, dtype: Int64 """ if self.ndim != 1: raise NotImplementedError From 6ae423d7f7716d7f2814f6cbb68f6821871da96b Mon Sep 17 00:00:00 2001 From: Andrew Date: Sun, 28 Jan 2024 11:36:36 +0000 Subject: [PATCH 28/28] use base.extensiontests in exmaple --- doc/source/development/extending.rst | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 97ed70faa35bc..d3651a271002b 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -218,16 +218,7 @@ validate your implementation. return CustomArray(numpy.arange(-10, 10, 1) - class Test2DCompat(base.NDArrayBacked2DTests): - pass - - - class TestComparisonOps(base.BaseComparisonOpsTests): - pass - - ... - - class TestSetitem(base.BaseSetitemTests): + class TestCustomArray(base.ExtensionTests): pass