diff --git a/pandas/core/series.py b/pandas/core/series.py index f3aaee26fe470..f92886b890254 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1598,37 +1598,6 @@ def to_markdown( **kwargs, ) -> str | None: ... - @doc( - klass=_shared_doc_kwargs["klass"], - storage_options=_shared_docs["storage_options"], - examples=dedent( - """Examples - -------- - >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") - >>> print(s.to_markdown()) - | | animal | - |---:|:---------| - | 0 | elk | - | 1 | pig | - | 2 | dog | - | 3 | quetzal | - - Output markdown with a tabulate option. - - >>> print(s.to_markdown(tablefmt="grid")) - +----+----------+ - | | animal | - +====+==========+ - | 0 | elk | - +----+----------+ - | 1 | pig | - +----+----------+ - | 2 | dog | - +----+----------+ - | 3 | quetzal | - +----+----------+""" - ), - ) @deprecate_nonkeyword_arguments( Pandas4Warning, allowed_args=["self", "buf"], name="to_markdown" ) @@ -1641,7 +1610,7 @@ def to_markdown( **kwargs, ) -> str | None: """ - Print {klass} in Markdown-friendly format. + Print Series in Markdown-friendly format. Parameters ---------- @@ -1652,7 +1621,15 @@ def to_markdown( index : bool, optional, default True Add index (row) labels. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. **kwargs These parameters will be passed to `tabulate \ @@ -1661,7 +1638,7 @@ def to_markdown( Returns ------- str - {klass} in Markdown-friendly format. + Series in Markdown-friendly format. See Also -------- @@ -1672,7 +1649,31 @@ def to_markdown( ----- Requires the `tabulate `_ package. - {examples} + Examples + -------- + >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + + Output markdown with a tabulate option. + + >>> print(s.to_markdown(tablefmt="grid")) + +----+----------+ + | | animal | + +====+==========+ + | 0 | elk | + +----+----------+ + | 1 | pig | + +----+----------+ + | 2 | dog | + +----+----------+ + | 3 | quetzal | + +----+----------+ """ return self.to_frame().to_markdown( buf, mode=mode, index=index, storage_options=storage_options, **kwargs @@ -2997,10 +2998,43 @@ def _append_internal(self, to_append: Series, ignore_index: bool = False) -> Ser return concat([self, to_append], ignore_index=ignore_index) - @doc( - _shared_docs["compare"], - dedent( - """ + def compare( + self, + other: Series, + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + result_names: Suffixes = ("self", "other"), + ) -> DataFrame | Series: + """ + Compare to another Series and show the differences. + + Parameters + ---------- + other : Series + Object to compare with. + + align_axis : {{0 or 'index', 1 or 'columns'}}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + + keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + + keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. + + result_names : tuple, default ('self', 'other') + Set the dataframes names in the comparison. + + .. versionadded:: 1.5.0 + Returns ------- Series or DataFrame @@ -3060,17 +3094,7 @@ def _append_internal(self, to_append: Series, ignore_index: bool = False) -> Ser 3 d b 4 e e """ - ), - klass=_shared_doc_kwargs["klass"], - ) - def compare( - self, - other: Series, - align_axis: Axis = 1, - keep_shape: bool = False, - keep_equal: bool = False, - result_names: Suffixes = ("self", "other"), - ) -> DataFrame | Series: + return super().compare( other=other, align_axis=align_axis, @@ -4533,14 +4557,81 @@ def _gotitem(self, key, ndim, subset=None) -> Self: """ ) - @doc( - _shared_docs["aggregate"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, - ) def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + """ + Aggregate using one or more operations over the specified axis. + + Parameters + ---------- + func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a Series or when passed to Series.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + scalar, Series or DataFrame + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + See Also + -------- + Series.apply : Invoke function on a Series. + Series.transform : Transform function producing a Series with like indexes. + + Notes + ----- + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to + ``numpy.mean(arr_2d, axis=0)``. + + `agg` is an alias for `aggregate`. Use the alias. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + A passed user-defined-function will be passed a Series for evaluation. + + If ``func`` defines an index relabeling, ``axis`` must be ``0`` or ``index``. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.agg("min") + 1 + + >>> s.agg(["min", "max"]) + min 1 + max 4 + dtype: int64 + """ + # Validate the axis parameter self._get_axis_number(axis) @@ -4554,14 +4645,151 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): agg = aggregate - @doc( - _shared_docs["transform"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], - ) def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs ) -> DataFrame | Series: + """ + Call ``func`` on self producing a Series with the same axis shape as self. + + Parameters + ---------- + func : function, str, list-like or dict-like + Function to use for transforming the data. If a function, must either + work when passed a Series or when passed to Series.apply. If func + is both list-like and dict-like, dict-like behavior takes precedence. + + Accepted combinations are: + + - function + - string function name + - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` + - dict-like of axis labels -> functions, function names or list-like of such + + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + Series + A Series that must have the same length as self. + + Raises + ------ + ValueError : If the returned Series has a different length than self. + + See Also + -------- + Series.agg : Only perform aggregating type operations. + Series.apply : Invoke function on a Series. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({{"A": range(3), "B": range(1, 4)}}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting Series must have the same length as the + input Series, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + + You can call transform on a GroupBy object: + + >>> df = pd.DataFrame( + ... { + ... { + ... "Date": [ + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... ], + ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], + ... } + ... } + ... ) + >>> df + Date Data + 0 2015-05-08 5 + 1 2015-05-07 8 + 2 2015-05-06 6 + 3 2015-05-05 1 + 4 2015-05-08 50 + 5 2015-05-07 100 + 6 2015-05-06 60 + 7 2015-05-05 120 + >>> df.groupby("Date")["Data"].transform("sum") + 0 55 + 1 108 + 2 66 + 3 121 + 4 55 + 5 108 + 6 66 + 7 121 + Name: Data, dtype: int64 + + >>> df = pd.DataFrame( + ... { + ... { + ... "c": [1, 1, 1, 2, 2, 2, 2], + ... "type": ["m", "n", "o", "m", "m", "n", "n"], + ... } + ... } + ... ) + >>> df + c type + 0 1 m + 1 1 n + 2 1 o + 3 2 m + 4 2 m + 5 2 n + 6 2 n + >>> df["size"] = df.groupby("c")["type"].transform(len) + >>> df + c type size + 0 1 m 3 + 1 1 n 3 + 2 1 o 3 + 3 2 m 4 + 4 2 m 4 + 5 2 n 4 + 6 2 n 4 + """ # Validate axis argument self._get_axis_number(axis) ser = self.copy(deep=False)