From f8937fd693dfa4af8fc87bb93ab5b802abb57c40 Mon Sep 17 00:00:00 2001 From: Lonercode Date: Fri, 24 Oct 2025 17:58:49 +0100 Subject: [PATCH 1/7] Add inline docstrings to functions in series.py --- pandas/core/series.py | 416 +++++++++++++++++++++++++++++++----------- 1 file changed, 310 insertions(+), 106 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index f3aaee26fe470..df7f515e1e22b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1598,37 +1598,6 @@ def to_markdown( **kwargs, ) -> str | None: ... - @doc( - klass=_shared_doc_kwargs["klass"], - storage_options=_shared_docs["storage_options"], - examples=dedent( - """Examples - -------- - >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") - >>> print(s.to_markdown()) - | | animal | - |---:|:---------| - | 0 | elk | - | 1 | pig | - | 2 | dog | - | 3 | quetzal | - - Output markdown with a tabulate option. - - >>> print(s.to_markdown(tablefmt="grid")) - +----+----------+ - | | animal | - +====+==========+ - | 0 | elk | - +----+----------+ - | 1 | pig | - +----+----------+ - | 2 | dog | - +----+----------+ - | 3 | quetzal | - +----+----------+""" - ), - ) @deprecate_nonkeyword_arguments( Pandas4Warning, allowed_args=["self", "buf"], name="to_markdown" ) @@ -1641,7 +1610,7 @@ def to_markdown( **kwargs, ) -> str | None: """ - Print {klass} in Markdown-friendly format. + Print Series in Markdown-friendly format. Parameters ---------- @@ -1652,7 +1621,15 @@ def to_markdown( index : bool, optional, default True Add index (row) labels. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. **kwargs These parameters will be passed to `tabulate \ @@ -1661,7 +1638,7 @@ def to_markdown( Returns ------- str - {klass} in Markdown-friendly format. + Series in Markdown-friendly format. See Also -------- @@ -1672,7 +1649,31 @@ def to_markdown( ----- Requires the `tabulate `_ package. - {examples} + Examples + -------- + >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + + Output markdown with a tabulate option. + + >>> print(s.to_markdown(tablefmt="grid")) + +----+----------+ + | | animal | + +====+==========+ + | 0 | elk | + +----+----------+ + | 1 | pig | + +----+----------+ + | 2 | dog | + +----+----------+ + | 3 | quetzal | + +----+----------+ """ return self.to_frame().to_markdown( buf, mode=mode, index=index, storage_options=storage_options, **kwargs @@ -2750,12 +2751,40 @@ def cov( this_values, other_values, min_periods=min_periods, ddof=ddof ) - @doc( - klass="Series", - extra_params="", - other_klass="DataFrame", - examples=dedent( - """ + def diff(self, periods: int = 1) -> Series: + """ + First discrete difference of element. + + Calculates the difference of a Series element compared with another + element in the Series (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative + values. + "" + Returns + ------- + Series + First differences of the Series. + + See Also + -------- + Series.pct_change: Percent change over given number of periods. + Series.shift: Shift index by desired number of periods with an + optional time freq. + DataFrame.diff: First discrete difference of object. + + Notes + ----- + For boolean dtypes, this uses :meth:`operator.xor` rather than + :meth:`operator.sub`. + The result is calculated according to current dtype in {klass}, + however dtype of the result is always float64. + + Examples + -------- Difference with previous row >>> s = pd.Series([1, 1, 2, 3, 5, 8]) @@ -2796,44 +2825,7 @@ def cov( >>> s.diff() 0 NaN 1 255.0 - dtype: float64""" - ), - ) - def diff(self, periods: int = 1) -> Series: - """ - First discrete difference of element. - - Calculates the difference of a {klass} element compared with another - element in the {klass} (default is element in previous row). - - Parameters - ---------- - periods : int, default 1 - Periods to shift for calculating difference, accepts negative - values. - {extra_params} - Returns - ------- - {klass} - First differences of the Series. - - See Also - -------- - {klass}.pct_change: Percent change over given number of periods. - {klass}.shift: Shift index by desired number of periods with an - optional time freq. - {other_klass}.diff: First discrete difference of object. - - Notes - ----- - For boolean dtypes, this uses :meth:`operator.xor` rather than - :meth:`operator.sub`. - The result is calculated according to current dtype in {klass}, - however dtype of the result is always float64. - - Examples - -------- - {examples} + dtype: float64 """ if not lib.is_integer(periods): if not (is_float(periods) and periods.is_integer()): @@ -2997,10 +2989,44 @@ def _append_internal(self, to_append: Series, ignore_index: bool = False) -> Ser return concat([self, to_append], ignore_index=ignore_index) - @doc( - _shared_docs["compare"], - dedent( - """ + def compare( + self, + other: Series, + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + result_names: Suffixes = ("self", "other"), + ) -> DataFrame | Series: + + """ + Compare to another Series and show the differences. + + Parameters + ---------- + other : Series + Object to compare with. + + align_axis : {{0 or 'index', 1 or 'columns'}}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + + keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + + keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. + + result_names : tuple, default ('self', 'other') + Set the dataframes names in the comparison. + + .. versionadded:: 1.5.0 + Returns ------- Series or DataFrame @@ -3060,17 +3086,6 @@ def _append_internal(self, to_append: Series, ignore_index: bool = False) -> Ser 3 d b 4 e e """ - ), - klass=_shared_doc_kwargs["klass"], - ) - def compare( - self, - other: Series, - align_axis: Axis = 1, - keep_shape: bool = False, - keep_equal: bool = False, - result_names: Suffixes = ("self", "other"), - ) -> DataFrame | Series: return super().compare( other=other, align_axis=align_axis, @@ -4533,14 +4548,81 @@ def _gotitem(self, key, ndim, subset=None) -> Self: """ ) - @doc( - _shared_docs["aggregate"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, - ) def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + """ + Aggregate using one or more operations over the specified axis. + + Parameters + ---------- + func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a Series or when passed to Series.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + scalar, Series or DataFrame + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + See Also + -------- + Series.apply : Invoke function on a Series. + Series.transform : Transform function producing a Series with like indexes. + + Notes + ----- + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to + ``numpy.mean(arr_2d, axis=0)``. + + `agg` is an alias for `aggregate`. Use the alias. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + A passed user-defined-function will be passed a Series for evaluation. + + If ``func`` defines an index relabeling, ``axis`` must be ``0`` or ``index``. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.agg('min') + 1 + + >>> s.agg(['min', 'max']) + min 1 + max 4 + dtype: int64 + """ + # Validate the axis parameter self._get_axis_number(axis) @@ -4554,14 +4636,136 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): agg = aggregate - @doc( - _shared_docs["transform"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], - ) def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs ) -> DataFrame | Series: + """ + Call ``func`` on self producing a Series with the same axis shape as self. + + Parameters + ---------- + func : function, str, list-like or dict-like + Function to use for transforming the data. If a function, must either + work when passed a Series or when passed to Series.apply. If func + is both list-like and dict-like, dict-like behavior takes precedence. + + Accepted combinations are: + + - function + - string function name + - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` + - dict-like of axis labels -> functions, function names or list-like of such. + + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + Series + A Series that must have the same length as self. + + Raises + ------ + ValueError : If the returned Series has a different length than self. + + See Also + -------- + Series.agg : Only perform aggregating type operations. + Series.apply : Invoke function on a Series. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({{'A': range(3), 'B': range(1, 4)}}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting Series must have the same length as the + input Series, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + + You can call transform on a GroupBy object: + + >>> df = pd.DataFrame({{ + ... "Date": [ + ... "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05", + ... "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05"], + ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], + ... }}) + >>> df + Date Data + 0 2015-05-08 5 + 1 2015-05-07 8 + 2 2015-05-06 6 + 3 2015-05-05 1 + 4 2015-05-08 50 + 5 2015-05-07 100 + 6 2015-05-06 60 + 7 2015-05-05 120 + >>> df.groupby('Date')['Data'].transform('sum') + 0 55 + 1 108 + 2 66 + 3 121 + 4 55 + 5 108 + 6 66 + 7 121 + Name: Data, dtype: int64 + + >>> df = pd.DataFrame({{ + ... "c": [1, 1, 1, 2, 2, 2, 2], + ... "type": ["m", "n", "o", "m", "m", "n", "n"] + ... }}) + >>> df + c type + 0 1 m + 1 1 n + 2 1 o + 3 2 m + 4 2 m + 5 2 n + 6 2 n + >>> df['size'] = df.groupby('c')['type'].transform(len) + >>> df + c type size + 0 1 m 3 + 1 1 n 3 + 2 1 o 3 + 3 2 m 4 + 4 2 m 4 + 5 2 n 4 + 6 2 n 4 + """ # Validate axis argument self._get_axis_number(axis) ser = self.copy(deep=False) From 4c401e73e5ebc8bf9e8d42464ca3e39c9019a095 Mon Sep 17 00:00:00 2001 From: Lonercode Date: Fri, 24 Oct 2025 18:32:01 +0100 Subject: [PATCH 2/7] run pre-commit --- pandas/core/series.py | 48 ++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index df7f515e1e22b..a77fae0dd929c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2997,7 +2997,6 @@ def compare( keep_equal: bool = False, result_names: Suffixes = ("self", "other"), ) -> DataFrame | Series: - """ Compare to another Series and show the differences. @@ -3026,7 +3025,7 @@ def compare( Set the dataframes names in the comparison. .. versionadded:: 1.5.0 - + Returns ------- Series or DataFrame @@ -4614,10 +4613,10 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): 3 4 dtype: int64 - >>> s.agg('min') + >>> s.agg("min") 1 - >>> s.agg(['min', 'max']) + >>> s.agg(["min", "max"]) min 1 max 4 dtype: int64 @@ -4686,7 +4685,7 @@ def transform( Examples -------- - >>> df = pd.DataFrame({{'A': range(3), 'B': range(1, 4)}}) + >>> df = pd.DataFrame({{"A": range(3), "B": range(1, 4)}}) >>> df A B 0 0 1 @@ -4715,12 +4714,23 @@ def transform( You can call transform on a GroupBy object: - >>> df = pd.DataFrame({{ - ... "Date": [ - ... "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05", - ... "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05"], - ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], - ... }}) + >>> df = pd.DataFrame( + ... { + ... { + ... "Date": [ + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... ], + ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], + ... } + ... } + ... ) >>> df Date Data 0 2015-05-08 5 @@ -4731,7 +4741,7 @@ def transform( 5 2015-05-07 100 6 2015-05-06 60 7 2015-05-05 120 - >>> df.groupby('Date')['Data'].transform('sum') + >>> df.groupby("Date")["Data"].transform("sum") 0 55 1 108 2 66 @@ -4742,10 +4752,14 @@ def transform( 7 121 Name: Data, dtype: int64 - >>> df = pd.DataFrame({{ - ... "c": [1, 1, 1, 2, 2, 2, 2], - ... "type": ["m", "n", "o", "m", "m", "n", "n"] - ... }}) + >>> df = pd.DataFrame( + ... { + ... { + ... "c": [1, 1, 1, 2, 2, 2, 2], + ... "type": ["m", "n", "o", "m", "m", "n", "n"], + ... } + ... } + ... ) >>> df c type 0 1 m @@ -4755,7 +4769,7 @@ def transform( 4 2 m 5 2 n 6 2 n - >>> df['size'] = df.groupby('c')['type'].transform(len) + >>> df["size"] = df.groupby("c")["type"].transform(len) >>> df c type size 0 1 m 3 From 616b9f2059a7b1eacd22e6b3bbfda40ebe5d9c8e Mon Sep 17 00:00:00 2001 From: Lonercode Date: Fri, 24 Oct 2025 19:34:38 +0100 Subject: [PATCH 3/7] fix lint errors --- pandas/core/series.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a77fae0dd929c..4d36639b97ad8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3011,6 +3011,7 @@ def compare( * 0, or 'index' : Resulting differences are stacked vertically with rows drawn alternately from self and other. * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. keep_shape : bool, default False @@ -3085,6 +3086,7 @@ def compare( 3 d b 4 e e """ + return super().compare( other=other, align_axis=align_axis, @@ -4653,7 +4655,7 @@ def transform( - function - string function name - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` - - dict-like of axis labels -> functions, function names or list-like of such. + - dict-like of axis labels -> functions, function names or list-like of such axis : {0 or 'index'} Unused. Parameter needed for compatibility with DataFrame. From bd6df1b6af68aaa1493ecfd6a6c81d57d6379f59 Mon Sep 17 00:00:00 2001 From: Lonercode Date: Fri, 24 Oct 2025 20:28:05 +0100 Subject: [PATCH 4/7] fix errors --- pandas/core/series.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4d36639b97ad8..14c0d4693d545 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3011,7 +3011,6 @@ def compare( * 0, or 'index' : Resulting differences are stacked vertically with rows drawn alternately from self and other. * 1, or 'columns' : Resulting differences are aligned horizontally - with columns drawn alternately from self and other. keep_shape : bool, default False From 75ee35c532846db1e136f6404f17a51ace6a8286 Mon Sep 17 00:00:00 2001 From: Lonercode Date: Fri, 24 Oct 2025 21:35:51 +0100 Subject: [PATCH 5/7] address checks --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 14c0d4693d545..745b6b788d8ca 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3009,9 +3009,9 @@ def compare( Determine which axis to align the comparison on. * 0, or 'index' : Resulting differences are stacked vertically - with rows drawn alternately from self and other. + with rows drawn alternately from self and other. * 1, or 'columns' : Resulting differences are aligned horizontally - with columns drawn alternately from self and other. + with columns drawn alternately from self and other. keep_shape : bool, default False If true, all rows and columns are kept. From d0cd3d0d1ed73127c206c171e216885638ed3b98 Mon Sep 17 00:00:00 2001 From: Lonercode Date: Fri, 24 Oct 2025 22:37:42 +0100 Subject: [PATCH 6/7] address CI checks --- pandas/core/series.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 745b6b788d8ca..73375a6d5973c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1622,14 +1622,14 @@ def to_markdown( Add index (row) labels. storage_options : dict, optional - Extra options that make sense for a particular storage connection, e.g. - host, port, username, password, etc. For HTTP(S) URLs the key-value pairs - are forwarded to ``urllib.request.Request`` as header options. For other - URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are - forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more - details, and for more examples on storage options refer `here - `_. + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. **kwargs These parameters will be passed to `tabulate \ @@ -2763,7 +2763,7 @@ def diff(self, periods: int = 1) -> Series: periods : int, default 1 Periods to shift for calculating difference, accepts negative values. - "" + Returns ------- Series @@ -4657,7 +4657,7 @@ def transform( - dict-like of axis labels -> functions, function names or list-like of such axis : {0 or 'index'} - Unused. Parameter needed for compatibility with DataFrame. + Unused. Parameter needed for compatibility with DataFrame. *args Positional arguments to pass to `func`. From 47256d851ad82643f1e67d546a4fd0806cc9b3fd Mon Sep 17 00:00:00 2001 From: Lonercode Date: Fri, 24 Oct 2025 23:09:48 +0100 Subject: [PATCH 7/7] restored @doc to Series.diff due to dependence in /core/frame.py --- pandas/core/series.py | 79 ++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 73375a6d5973c..f92886b890254 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2751,40 +2751,12 @@ def cov( this_values, other_values, min_periods=min_periods, ddof=ddof ) - def diff(self, periods: int = 1) -> Series: - """ - First discrete difference of element. - - Calculates the difference of a Series element compared with another - element in the Series (default is element in previous row). - - Parameters - ---------- - periods : int, default 1 - Periods to shift for calculating difference, accepts negative - values. - - Returns - ------- - Series - First differences of the Series. - - See Also - -------- - Series.pct_change: Percent change over given number of periods. - Series.shift: Shift index by desired number of periods with an - optional time freq. - DataFrame.diff: First discrete difference of object. - - Notes - ----- - For boolean dtypes, this uses :meth:`operator.xor` rather than - :meth:`operator.sub`. - The result is calculated according to current dtype in {klass}, - however dtype of the result is always float64. - - Examples - -------- + @doc( + klass="Series", + extra_params="", + other_klass="DataFrame", + examples=dedent( + """ Difference with previous row >>> s = pd.Series([1, 1, 2, 3, 5, 8]) @@ -2825,7 +2797,44 @@ def diff(self, periods: int = 1) -> Series: >>> s.diff() 0 NaN 1 255.0 - dtype: float64 + dtype: float64""" + ), + ) + def diff(self, periods: int = 1) -> Series: + """ + First discrete difference of element. + + Calculates the difference of a {klass} element compared with another + element in the {klass} (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative + values. + {extra_params} + Returns + ------- + {klass} + First differences of the Series. + + See Also + -------- + {klass}.pct_change: Percent change over given number of periods. + {klass}.shift: Shift index by desired number of periods with an + optional time freq. + {other_klass}.diff: First discrete difference of object. + + Notes + ----- + For boolean dtypes, this uses :meth:`operator.xor` rather than + :meth:`operator.sub`. + The result is calculated according to current dtype in {klass}, + however dtype of the result is always float64. + + Examples + -------- + {examples} """ if not lib.is_integer(periods): if not (is_float(periods) and periods.is_integer()):