From 9c87031d52da34d6344d6bba8ac472eb289dd012 Mon Sep 17 00:00:00 2001 From: Talyahav17 <122670502+Talyahav17@users.noreply.github.com> Date: Sun, 14 Sep 2025 17:08:27 +0300 Subject: [PATCH 1/5] DOC: clarify Series.map behavior for categorical dtype --- pandas/core/arrays/categorical.py | 5 +++++ pandas/core/series.py | 34 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4b5d2acf008a8..303083d0ff447 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1551,6 +1551,11 @@ def map( Series.apply : Apply more complex functions on a :class:`~pandas.Series`. + Notes + ----- + The mapping function is applied to the categories, not to + each element of the array. + Examples -------- >>> cat = pd.Categorical(["a", "b", "c"]) diff --git a/pandas/core/series.py b/pandas/core/series.py index 56ef313d1a73a..70b0c23bf5c53 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4389,6 +4389,12 @@ def map( provides a method for default values), then this default is used rather than ``NaN``. + When the Series has ``⁠dtype="category"⁠``, the function is applied + to the categories and not to each individual value. This means + that if the same category appears multiple times, the function is + only called once for that category, and the result is reused for + all occurrences. Missing values (NaN) are not passed to the function. + Examples -------- >>> s = pd.Series(["cat", "dog", np.nan, "rabbit"]) @@ -4428,6 +4434,34 @@ def map( 2 NaN 3 I am a rabbit dtype: object + + For categorical data, the function is only applied to the categories: + + >>> s = pd.Series(list("cabaa")) + >>> s.map(print) + c + a + b + a + a + 0 None + 1 None + 2 None + 3 None + 4 None + dtype: object + + >>> s_cat = s.astype("category") + >>> s_cat.map(print) # function called once per unique category + a + b + c + 0 None + 1 None + 2 None + 3 None + 4 None + dtype: object """ if func is None: if "arg" in kwargs: From 482d504bb3b859fa9dd086ff48c59183d1cb97ef Mon Sep 17 00:00:00 2001 From: Talyahav17 <122670502+Talyahav17@users.noreply.github.com> Date: Sun, 14 Sep 2025 18:33:01 +0300 Subject: [PATCH 2/5] ci: retrigger From edc9b23e6df7e2997691a9696fbbb2f35673d9d9 Mon Sep 17 00:00:00 2001 From: Talyahav17 <122670502+Talyahav17@users.noreply.github.com> Date: Tue, 16 Sep 2025 13:34:04 +0300 Subject: [PATCH 3/5] =?UTF-8?q?DOC:=20address=20review=20=E2=80=94=20move?= =?UTF-8?q?=20categorical=20map=20note=20to=20Examples=20and=20trim=20Seri?= =?UTF-8?q?es.map=20notes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas/core/arrays/categorical.py | 20 +++++++++++++++----- pandas/core/series.py | 6 +----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 303083d0ff447..baf380cb25b7e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1551,11 +1551,6 @@ def map( Series.apply : Apply more complex functions on a :class:`~pandas.Series`. - Notes - ----- - The mapping function is applied to the categories, not to - each element of the array. - Examples -------- >>> cat = pd.Categorical(["a", "b", "c"]) @@ -1590,6 +1585,21 @@ def map( >>> cat.map({"a": "first", "b": "second"}, na_action=None) Index(['first', 'second', nan], dtype='str') + + The mapping function is applied to categories, not to each value. It is + therefore only called once per unique category, and the result reused for + all occurrences: + + >>> cat = pd.Categorical(["a", "a", "b"]) # doctest: +SKIP + >>> calls = [] # doctest: +SKIP + >>> def f(x): # doctest: +SKIP + ... calls.append(x) + ... return x.upper() + >>> cat.map(f) + ['A', 'A', 'B'] + Categories (2, str): ['A', 'B'] + >>> calls # doctest: +SKIP + ['a', 'b'] """ assert callable(mapper) or is_dict_like(mapper) diff --git a/pandas/core/series.py b/pandas/core/series.py index 70b0c23bf5c53..b8dce68479738 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4389,11 +4389,7 @@ def map( provides a method for default values), then this default is used rather than ``NaN``. - When the Series has ``⁠dtype="category"⁠``, the function is applied - to the categories and not to each individual value. This means - that if the same category appears multiple times, the function is - only called once for that category, and the result is reused for - all occurrences. Missing values (NaN) are not passed to the function. + Examples -------- From 5e348a1995c5067c5355a5eed627f8843f9a5335 Mon Sep 17 00:00:00 2001 From: Talyahav17 <122670502+Talyahav17@users.noreply.github.com> Date: Sun, 21 Sep 2025 11:25:50 +0300 Subject: [PATCH 4/5] DOC: fix doctest in categorical.map by removing SKIP markers --- pandas/core/arrays/categorical.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index baf380cb25b7e..a61f9498d52c7 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1590,15 +1590,16 @@ def map( therefore only called once per unique category, and the result reused for all occurrences: - >>> cat = pd.Categorical(["a", "a", "b"]) # doctest: +SKIP - >>> calls = [] # doctest: +SKIP - >>> def f(x): # doctest: +SKIP + >>> cat = pd.Categorical(["a", "a", "b"]) + >>> calls = [] + >>> def f(x): ... calls.append(x) ... return x.upper() - >>> cat.map(f) + >>> result = cat.map(f) + >>> result ['A', 'A', 'B'] Categories (2, str): ['A', 'B'] - >>> calls # doctest: +SKIP + >>> calls ['a', 'b'] """ assert callable(mapper) or is_dict_like(mapper) From 1dd172e1a3dc58415fbd9b3ff9780899a01e307c Mon Sep 17 00:00:00 2001 From: Talyahav17 <122670502+Talyahav17@users.noreply.github.com> Date: Sun, 21 Sep 2025 11:32:39 +0300 Subject: [PATCH 5/5] FIX: remove whitespace from blank line in series.py --- pandas/core/series.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index f82d0745afca7..3029296dad887 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4380,8 +4380,6 @@ def map( provides a method for default values), then this default is used rather than ``NaN``. - - Examples -------- >>> s = pd.Series(["cat", "dog", np.nan, "rabbit"])