diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 016e553cf2092..470129d6d860b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1001,6 +1001,7 @@ Numeric - Bug in :meth:`Series.dot` returning ``object`` dtype for :class:`ArrowDtype` and nullable-dtype data (:issue:`61375`) - Bug in :meth:`Series.std` and :meth:`Series.var` when using complex-valued data (:issue:`61645`) - Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`) +- Bug in arithmetic operations between objects with numpy-nullable dtype and :class:`ArrowDtype` incorrectly raising (:issue:`58602`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 57efde1a928bc..cdba53662e6fa 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -37,7 +37,10 @@ is_string_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import BaseMaskedDtype +from pandas.core.dtypes.dtypes import ( + ArrowDtype, + BaseMaskedDtype, +) from pandas.core.dtypes.missing import ( array_equivalent, is_valid_na_for_dtype, @@ -767,6 +770,10 @@ def _arith_method(self, other, op): pd_op = ops.get_array_op(op) other = ensure_wrapped_if_datetimelike(other) + if isinstance(other, ExtensionArray) and isinstance(other.dtype, ArrowDtype): + # GH#58602 + return NotImplemented + if op_name in {"pow", "rpow"} and isinstance(other, np.bool_): # Avoid DeprecationWarning: In future, it will be an error # for 'np.bool_' scalars to be interpreted as an index @@ -843,7 +850,11 @@ def _cmp_method(self, other, op) -> BooleanArray: mask = None - if isinstance(other, BaseMaskedArray): + if isinstance(other, ExtensionArray) and isinstance(other.dtype, ArrowDtype): + # GH#58602 + return NotImplemented + + elif isinstance(other, BaseMaskedArray): other, mask = other._data, other._mask elif is_list_like(other): diff --git a/pandas/core/series.py b/pandas/core/series.py index f3aaee26fe470..e92a17fd98def 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -237,105 +237,36 @@ @set_module("pandas") class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] """ - One-dimensional ndarray with axis labels (including time series). - - Labels need not be unique but must be a hashable type. The object - supports both integer- and label-based indexing and provides a host of - methods for performing operations involving the index. Statistical - methods from ndarray have been overridden to automatically exclude - missing data (currently represented as NaN). - - Operations between Series (+, -, /, \\*, \\*\\*) align values based on their - associated index values-- they need not be the same length. The result - index will be the sorted union of the two indexes. - - Parameters - ---------- - data : array-like, Iterable, dict, or scalar value - Contains data stored in Series. If data is a dict, argument order is - maintained. Unordered sets are not supported. - index : array-like or Index (1d) - Values must be hashable and have the same length as `data`. - Non-unique index values are allowed. Will default to - RangeIndex (0, 1, 2, ..., n) if not provided. If data is dict-like - and index is None, then the keys in the data are used as the index. If the - index is not None, the resulting Series is reindexed with the index values. - dtype : str, numpy.dtype, or ExtensionDtype, optional - Data type for the output Series. If not specified, this will be - inferred from `data`. - See the :ref:`user guide ` for more usages. - name : Hashable, default None - The name to give to the Series. - copy : bool, default False - Copy input data. Only affects Series or 1d ndarray input. See examples. - - See Also - -------- - DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data. - Index : Immutable sequence used for indexing and alignment. - - Notes - ----- - Please reference the :ref:`User Guide ` for more information. - - Examples - -------- - Constructing Series from a dictionary with an Index specified - - >>> d = {"a": 1, "b": 2, "c": 3} - >>> ser = pd.Series(data=d, index=["a", "b", "c"]) - >>> ser - a 1 - b 2 - c 3 - dtype: int64 - - The keys of the dictionary match with the Index values, hence the Index - values have no effect. - - >>> d = {"a": 1, "b": 2, "c": 3} - >>> ser = pd.Series(data=d, index=["x", "y", "z"]) - >>> ser - x NaN - y NaN - z NaN - dtype: float64 - - Note that the Index is first built with the keys from the dictionary. - After this the Series is reindexed with the given Index values, hence we - get all NaN as a result. - - Constructing Series from a list with `copy=False`. - - >>> r = [1, 2] - >>> ser = pd.Series(r, copy=False) - >>> ser.iloc[0] = 999 - >>> r - [1, 2] - >>> ser - 0 999 - 1 2 - dtype: int64 - - Due to input data type the Series has a `copy` of - the original data even though `copy=False`, so - the data is unchanged. - - Constructing Series from a 1d ndarray with `copy=False`. - - >>> r = np.array([1, 2]) - >>> ser = pd.Series(r, copy=False) - >>> ser.iloc[0] = 999 - >>> r - array([999, 2]) - >>> ser - 0 999 - 1 2 - dtype: int64 - - Due to input data type the Series has a `view` on - the original data, so - the data is changed as well. + One-dimensional ndarray with axis labels (including time series). + +Labels need not be unique but must be hashable (e.g. 1, 'a', or tuple). +Non-unique labels can be accessed multiple times. + +Parameters +---------- +data : array-like, Iterable, dict, or scalar value + Contains data stored in Series. If data is a dict, argument order is + maintained if the Python version is >= 3.7 and python is not 3.6. + + .. versionchanged:: 2.0.0 + Argument order is maintained if the Python version is >= 3.7 and python is not 3.6. +index : array-like or Index (1d) + Values must be hashable and have the same length as `data`. Non-unique + index values are allowed. Default is RangeIndex(0, ..., n). + + .. versionchanged:: 2.0.0 + Non-unique index values are allowed. +dtype : str, numpy.dtype, or ExtensionDtype, optional + Data type for the Series. If None, dtype will be inferred. +name : str, optional + The name to give to the Series. +copy : bool, default False + Copy input data. + +Returns +------- +pandas.Series + The constructed Series object. """ _typ = "series" diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2949b7ccc7cf3..2aa1b658fdf7b 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3702,6 +3702,28 @@ def test_pow_with_all_na_float(): tm.assert_series_equal(result, expected) +def test_mul_numpy_nullable_with_pyarrow_float(): + # GH#58602 + left = pd.Series(range(5), dtype="Float64") + right = pd.Series(range(5), dtype="float64[pyarrow]") + + expected = pd.Series([0, 1, 4, 9, 16], dtype="float64[pyarrow]") + + result = left * right + tm.assert_series_equal(result, expected) + + result2 = right * left + tm.assert_series_equal(result2, expected) + + # while we're here, let's check __eq__ + result3 = left == right + expected3 = pd.Series([True] * 5, dtype="bool[pyarrow]") + tm.assert_series_equal(result3, expected3) + + result4 = right == left + tm.assert_series_equal(result4, expected3) + + @pytest.mark.parametrize( "type_name, expected_size", [