Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 109 additions & 16 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,12 +1185,12 @@ def astype(self, dtype: Dtype, copy: bool = True):
How to handle negative values in `indices`.

* False: negative values in `indices` indicate positional indices
from the right (the default). This is similar to
:func:`numpy.take`.
from the right (the default). This is similar to
:func:`numpy.take`.

* True: negative values in `indices` indicate
missing values. These values are set to `fill_value`. Any other
other negative values raise a ``ValueError``.
missing values. These values are set to `fill_value`. Any other
other negative values raise a ``ValueError``.

fill_value : scalar, default None
If allow_fill=True and fill_value is not None, indices specified by
Expand All @@ -1216,7 +1216,6 @@ def astype(self, dtype: Dtype, copy: bool = True):
Index(['c', 'c', 'b', 'c'], dtype='str')
"""

@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
def take(
self,
indices,
Expand All @@ -1225,6 +1224,51 @@ def take(
fill_value=None,
**kwargs,
) -> Self:
"""
Return a new Index of the values selected by the indices.

For internal compatibility with numpy arrays.

Parameters
----------
indices : array-like
Indices to be taken.
axis : int, optional
The axis over which to select values, always 0.
allow_fill : bool, default True
How to handle negative values in `indices`.

* False: negative values in `indices` indicate positional indices
from the right (the default). This is similar to
:func:`numpy.take`.

* True: negative values in `indices` indicate
missing values. These values are set to `fill_value`. Any
other negative values raise a ``ValueError``.

fill_value : scalar, default None
If allow_fill=True and fill_value is not None, indices specified by
-1 are regarded as NA. If Index doesn't hold NA, raise ValueError.
**kwargs
Required for compatibility with numpy.

Returns
-------
Index
An index formed of elements at the given indices. Will be the same
type as self, except for RangeIndex.

See Also
--------
numpy.ndarray.take: Return an array formed from the
elements of a at the given indices.

Examples
--------
>>> idx = pd.Index(["a", "b", "c"])
>>> idx.take([2, 2, 1, 2])
Index(['c', 'c', 'b', 'c'], dtype='str')
"""
if kwargs:
nv.validate_take((), kwargs)
if is_scalar(indices):
Expand Down Expand Up @@ -1272,26 +1316,27 @@ def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:
allow_fill = False
return allow_fill

_index_shared_docs["repeat"] = """
Repeat elements of a %(klass)s.
def repeat(self, repeats, axis: None = None) -> Self:
"""
Repeat elements of a Index.

Returns a new %(klass)s where each element of the current %(klass)s
Returns a new Index where each element of the current Index
is repeated consecutively a given number of times.

Parameters
----------
repeats : int or array of ints
The number of repetitions for each element. This should be a
non-negative integer. Repeating 0 times will return an empty
%(klass)s.
Index.
axis : None
Must be ``None``. Has no effect but is accepted for compatibility
with numpy.

Returns
-------
%(klass)s
Newly created %(klass)s with repeated elements.
Index
Newly created Index with repeated elements.

See Also
--------
Expand All @@ -1300,17 +1345,14 @@ def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:

Examples
--------
>>> idx = pd.Index(['a', 'b', 'c'])
>>> idx = pd.Index(["a", "b", "c"])
>>> idx
Index(['a', 'b', 'c'], dtype='object')
>>> idx.repeat(2)
Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
>>> idx.repeat([1, 2, 3])
Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
"""

@Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
def repeat(self, repeats, axis: None = None) -> Self:
repeats = ensure_platform_int(repeats)
nv.validate_repeat((), {"axis": axis})
res_values = self._values.repeat(repeats)
Expand Down Expand Up @@ -5993,10 +6035,61 @@ def _should_fallback_to_positional(self) -> bool:
(array([-1, 1, 3, 4, -1]), array([0, 2]))
"""

@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
def get_indexer_non_unique(
self, target
) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
"""
Compute indexer and mask for new index given the current index.

The indexer should be then used as an input to ndarray.take to align the
current data to the new index.

Parameters
----------
target : Index
An iterable containing the values to be used for computing indexer.

Returns
-------
indexer : np.ndarray[np.intp]
Integers from 0 to n - 1 indicating that the index at these
positions matches the corresponding target values. Missing values
in the target are marked by -1.
missing : np.ndarray[np.intp]
An indexer into the target of the values not found.
These correspond to the -1 in the indexer array.

See Also
--------
Index.get_indexer : Computes indexer and mask for new index given
the current index.
Index.get_indexer_for : Returns an indexer even when non-unique.

Examples
--------
>>> index = pd.Index(["c", "b", "a", "b", "b"])
>>> index.get_indexer_non_unique(["b", "b"])
(array([1, 3, 4, 1, 3, 4]), array([], dtype=int64))

In the example below there are no matched values.

>>> index = pd.Index(["c", "b", "a", "b", "b"])
>>> index.get_indexer_non_unique(["q", "r", "t"])
(array([-1, -1, -1]), array([0, 1, 2]))

For this reason, the returned ``indexer`` contains only integers equal to -1.
It demonstrates that there's no match between the index and the ``target``
values at these positions. The mask [0, 1, 2] in the return value shows that
the first, second, and third elements are missing.

Notice that the return value is a tuple contains two items. In the example
below the first item is an array of locations in ``index``. The second
item is a mask shows that the first and third elements are missing.

>>> index = pd.Index(["c", "b", "a", "b", "b"])
>>> index.get_indexer_non_unique(["f", "b", "s"])
(array([-1, 1, 3, 4, -1]), array([0, 2]))
"""
target = self._maybe_cast_listlike_indexer(target)

if not self._should_compare(target) and not self._should_partial_index(target):
Expand Down
Loading