Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ cdef class BaseMultiIndexCodesEngine:
int_keys : 1-dimensional array of dtype uint64 or object
Integers representing one combination each
"""
level_codes = list(target._recode_for_new_levels(self.levels))
level_codes = list(target._recode_for_new_levels(self.levels, copy=True))
for i, codes in enumerate(level_codes):
if self.levels[i].hasnans:
na_index = self.levels[i].isna().nonzero()[0][0]
Expand Down
16 changes: 9 additions & 7 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,13 +670,15 @@ def _from_inferred_categories(
if known_categories:
# Recode from observation order to dtype.categories order.
categories = dtype.categories
codes = recode_for_categories(inferred_codes, cats, categories)
codes = recode_for_categories(inferred_codes, cats, categories, copy=False)
elif not cats.is_monotonic_increasing:
# Sort categories and recode for unknown categories.
unsorted = cats.copy()
categories = cats.sort_values()

codes = recode_for_categories(inferred_codes, unsorted, categories)
codes = recode_for_categories(
inferred_codes, unsorted, categories, copy=False
)
dtype = CategoricalDtype(categories, ordered=False)
else:
dtype = CategoricalDtype(cats, ordered=False)
Expand Down Expand Up @@ -945,7 +947,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:

super().__init__(self._ndarray, new_dtype)

def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
def _set_dtype(self, dtype: CategoricalDtype, *, copy: bool) -> Self:
"""
Internal method for directly updating the CategoricalDtype

Expand All @@ -959,7 +961,7 @@ def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
a (valid) instance of `CategoricalDtype`.
"""
codes = recode_for_categories(
self.codes, self.categories, dtype.categories, copy
self.codes, self.categories, dtype.categories, copy=copy
)
return type(self)._simple_new(codes, dtype=dtype)

Expand Down Expand Up @@ -1154,7 +1156,7 @@ def set_categories(
codes = cat._codes
else:
codes = recode_for_categories(
cat.codes, cat.categories, new_dtype.categories
cat.codes, cat.categories, new_dtype.categories, copy=False
)
NDArrayBacked.__init__(cat, codes, new_dtype)
return cat
Expand Down Expand Up @@ -3006,7 +3008,7 @@ def _get_codes_for_values(


def recode_for_categories(
codes: np.ndarray, old_categories, new_categories, copy: bool = True
codes: np.ndarray, old_categories, new_categories, *, copy: bool
) -> np.ndarray:
"""
Convert a set of codes for to a new set of categories
Expand All @@ -3027,7 +3029,7 @@ def recode_for_categories(
>>> old_cat = pd.Index(["b", "a", "c"])
>>> new_cat = pd.Index(["a", "b"])
>>> codes = np.array([0, 1, 1, 2])
>>> recode_for_categories(codes, old_cat, new_cat)
>>> recode_for_categories(codes, old_cat, new_cat, copy=True)
array([ 1, 0, 0, -1], dtype=int8)
"""
if len(old_categories) == 0:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ def _maybe_unwrap(x):
categories = categories.sort_values()

new_codes = [
recode_for_categories(c.codes, c.categories, categories) for c in to_union
recode_for_categories(c.codes, c.categories, categories, copy=False)
for c in to_union
]
new_codes = np.concatenate(new_codes)
else:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica

# we recode according to the uniques
categories = c.categories.take(take_codes)
codes = recode_for_categories(c.codes, c.categories, categories)
codes = recode_for_categories(c.codes, c.categories, categories, copy=False)

# return a new categorical that maps our new codes
# and categories
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2675,7 +2675,7 @@ def _reorder_ilevels(self, order) -> MultiIndex:
)

def _recode_for_new_levels(
self, new_levels, copy: bool = True
self, new_levels, *, copy: bool
) -> Generator[np.ndarray]:
if len(new_levels) > self.nlevels:
raise AssertionError(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/categorical/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def test_recode_to_categories(self, codes, old, new, expected):
expected = np.asanyarray(expected, dtype=np.int8)
old = Index(old)
new = Index(new)
result = recode_for_categories(codes, old, new)
result = recode_for_categories(codes, old, new, copy=True)
tm.assert_numpy_array_equal(result, expected)

def test_recode_to_categories_large(self):
Expand All @@ -489,5 +489,5 @@ def test_recode_to_categories_large(self):
old = Index(codes)
expected = np.arange(N - 1, -1, -1, dtype=np.int16)
new = Index(expected)
result = recode_for_categories(codes, old, new)
result = recode_for_categories(codes, old, new, copy=True)
tm.assert_numpy_array_equal(result, expected)
Loading