3911 lines
		
	
	
		
			124 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			3911 lines
		
	
	
		
			124 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""
 | 
						||
Provide the groupby split-apply-combine paradigm. Define the GroupBy
 | 
						||
class providing the base-class of operations.
 | 
						||
 | 
						||
The SeriesGroupBy and DataFrameGroupBy sub-class
 | 
						||
(defined in pandas.core.groupby.generic)
 | 
						||
expose these user-facing objects to provide specific functionality.
 | 
						||
"""
 | 
						||
from __future__ import annotations
 | 
						||
 | 
						||
from contextlib import contextmanager
 | 
						||
import datetime
 | 
						||
from functools import (
 | 
						||
    partial,
 | 
						||
    wraps,
 | 
						||
)
 | 
						||
import inspect
 | 
						||
from textwrap import dedent
 | 
						||
import types
 | 
						||
from typing import (
 | 
						||
    Callable,
 | 
						||
    Hashable,
 | 
						||
    Iterable,
 | 
						||
    Iterator,
 | 
						||
    List,
 | 
						||
    Literal,
 | 
						||
    Mapping,
 | 
						||
    Sequence,
 | 
						||
    TypeVar,
 | 
						||
    Union,
 | 
						||
    cast,
 | 
						||
    final,
 | 
						||
)
 | 
						||
import warnings
 | 
						||
 | 
						||
import numpy as np
 | 
						||
 | 
						||
from pandas._config.config import option_context
 | 
						||
 | 
						||
from pandas._libs import (
 | 
						||
    Timestamp,
 | 
						||
    lib,
 | 
						||
)
 | 
						||
import pandas._libs.groupby as libgroupby
 | 
						||
from pandas._typing import (
 | 
						||
    ArrayLike,
 | 
						||
    IndexLabel,
 | 
						||
    NDFrameT,
 | 
						||
    PositionalIndexer,
 | 
						||
    RandomState,
 | 
						||
    Scalar,
 | 
						||
    T,
 | 
						||
    npt,
 | 
						||
)
 | 
						||
from pandas.compat.numpy import function as nv
 | 
						||
from pandas.errors import AbstractMethodError
 | 
						||
from pandas.util._decorators import (
 | 
						||
    Appender,
 | 
						||
    Substitution,
 | 
						||
    cache_readonly,
 | 
						||
    doc,
 | 
						||
)
 | 
						||
from pandas.util._exceptions import find_stack_level
 | 
						||
 | 
						||
from pandas.core.dtypes.common import (
 | 
						||
    is_bool_dtype,
 | 
						||
    is_datetime64_dtype,
 | 
						||
    is_float_dtype,
 | 
						||
    is_integer,
 | 
						||
    is_integer_dtype,
 | 
						||
    is_numeric_dtype,
 | 
						||
    is_object_dtype,
 | 
						||
    is_scalar,
 | 
						||
    is_timedelta64_dtype,
 | 
						||
)
 | 
						||
from pandas.core.dtypes.missing import (
 | 
						||
    isna,
 | 
						||
    notna,
 | 
						||
)
 | 
						||
 | 
						||
from pandas.core import nanops
 | 
						||
from pandas.core._numba import executor
 | 
						||
import pandas.core.algorithms as algorithms
 | 
						||
from pandas.core.arrays import (
 | 
						||
    BaseMaskedArray,
 | 
						||
    BooleanArray,
 | 
						||
    Categorical,
 | 
						||
    ExtensionArray,
 | 
						||
)
 | 
						||
from pandas.core.base import (
 | 
						||
    DataError,
 | 
						||
    PandasObject,
 | 
						||
    SelectionMixin,
 | 
						||
)
 | 
						||
import pandas.core.common as com
 | 
						||
from pandas.core.frame import DataFrame
 | 
						||
from pandas.core.generic import NDFrame
 | 
						||
from pandas.core.groupby import (
 | 
						||
    base,
 | 
						||
    numba_,
 | 
						||
    ops,
 | 
						||
)
 | 
						||
from pandas.core.groupby.indexing import (
 | 
						||
    GroupByIndexingMixin,
 | 
						||
    GroupByNthSelector,
 | 
						||
)
 | 
						||
from pandas.core.indexes.api import (
 | 
						||
    CategoricalIndex,
 | 
						||
    Index,
 | 
						||
    MultiIndex,
 | 
						||
)
 | 
						||
from pandas.core.internals.blocks import ensure_block_shape
 | 
						||
import pandas.core.sample as sample
 | 
						||
from pandas.core.series import Series
 | 
						||
from pandas.core.sorting import get_group_index_sorter
 | 
						||
from pandas.core.util.numba_ import (
 | 
						||
    NUMBA_FUNC_CACHE,
 | 
						||
    maybe_use_numba,
 | 
						||
)
 | 
						||
 | 
						||
_common_see_also = """
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        Series.%(name)s : Apply a function %(name)s to a Series.
 | 
						||
        DataFrame.%(name)s : Apply a function %(name)s
 | 
						||
            to each row or column of a DataFrame.
 | 
						||
"""
 | 
						||
 | 
						||
_apply_docs = {
 | 
						||
    "template": """
 | 
						||
    Apply function ``func`` group-wise and combine the results together.
 | 
						||
 | 
						||
    The function passed to ``apply`` must take a {input} as its first
 | 
						||
    argument and return a DataFrame, Series or scalar. ``apply`` will
 | 
						||
    then take care of combining the results back together into a single
 | 
						||
    dataframe or series. ``apply`` is therefore a highly flexible
 | 
						||
    grouping method.
 | 
						||
 | 
						||
    While ``apply`` is a very flexible method, its downside is that
 | 
						||
    using it can be quite a bit slower than using more specific methods
 | 
						||
    like ``agg`` or ``transform``. Pandas offers a wide range of method that will
 | 
						||
    be much faster than using ``apply`` for their specific purposes, so try to
 | 
						||
    use them before reaching for ``apply``.
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    func : callable
 | 
						||
        A callable that takes a {input} as its first argument, and
 | 
						||
        returns a dataframe, a series or a scalar. In addition the
 | 
						||
        callable may take positional and keyword arguments.
 | 
						||
    args, kwargs : tuple and dict
 | 
						||
        Optional positional and keyword arguments to pass to ``func``.
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    applied : Series or DataFrame
 | 
						||
 | 
						||
    See Also
 | 
						||
    --------
 | 
						||
    pipe : Apply function to the full GroupBy object instead of to each
 | 
						||
        group.
 | 
						||
    aggregate : Apply aggregate function to the GroupBy object.
 | 
						||
    transform : Apply function column-by-column to the GroupBy object.
 | 
						||
    Series.apply : Apply a function to a Series.
 | 
						||
    DataFrame.apply : Apply a function to each row or column of a DataFrame.
 | 
						||
 | 
						||
    Notes
 | 
						||
    -----
 | 
						||
 | 
						||
    .. versionchanged:: 1.3.0
 | 
						||
 | 
						||
        The resulting dtype will reflect the return value of the passed ``func``,
 | 
						||
        see the examples below.
 | 
						||
 | 
						||
    Functions that mutate the passed object can produce unexpected
 | 
						||
    behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
 | 
						||
    for more details.
 | 
						||
 | 
						||
    Examples
 | 
						||
    --------
 | 
						||
    {examples}
 | 
						||
    """,
 | 
						||
    "dataframe_examples": """
 | 
						||
    >>> df = pd.DataFrame({'A': 'a a b'.split(),
 | 
						||
    ...                    'B': [1,2,3],
 | 
						||
    ...                    'C': [4,6,5]})
 | 
						||
    >>> g = df.groupby('A')
 | 
						||
 | 
						||
    Notice that ``g`` has two groups, ``a`` and ``b``.
 | 
						||
    Calling `apply` in various ways, we can get different grouping results:
 | 
						||
 | 
						||
    Example 1: below the function passed to `apply` takes a DataFrame as
 | 
						||
    its argument and returns a DataFrame. `apply` combines the result for
 | 
						||
    each group together into a new DataFrame:
 | 
						||
 | 
						||
    >>> g[['B', 'C']].apply(lambda x: x / x.sum())
 | 
						||
              B    C
 | 
						||
    0  0.333333  0.4
 | 
						||
    1  0.666667  0.6
 | 
						||
    2  1.000000  1.0
 | 
						||
 | 
						||
    Example 2: The function passed to `apply` takes a DataFrame as
 | 
						||
    its argument and returns a Series.  `apply` combines the result for
 | 
						||
    each group together into a new DataFrame.
 | 
						||
 | 
						||
    .. versionchanged:: 1.3.0
 | 
						||
 | 
						||
        The resulting dtype will reflect the return value of the passed ``func``.
 | 
						||
 | 
						||
    >>> g[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min())
 | 
						||
         B    C
 | 
						||
    A
 | 
						||
    a  1.0  2.0
 | 
						||
    b  0.0  0.0
 | 
						||
 | 
						||
    Example 3: The function passed to `apply` takes a DataFrame as
 | 
						||
    its argument and returns a scalar. `apply` combines the result for
 | 
						||
    each group together into a Series, including setting the index as
 | 
						||
    appropriate:
 | 
						||
 | 
						||
    >>> g.apply(lambda x: x.C.max() - x.B.min())
 | 
						||
    A
 | 
						||
    a    5
 | 
						||
    b    2
 | 
						||
    dtype: int64""",
 | 
						||
    "series_examples": """
 | 
						||
    >>> s = pd.Series([0, 1, 2], index='a a b'.split())
 | 
						||
    >>> g = s.groupby(s.index)
 | 
						||
 | 
						||
    From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``.
 | 
						||
    Calling `apply` in various ways, we can get different grouping results:
 | 
						||
 | 
						||
    Example 1: The function passed to `apply` takes a Series as
 | 
						||
    its argument and returns a Series.  `apply` combines the result for
 | 
						||
    each group together into a new Series.
 | 
						||
 | 
						||
    .. versionchanged:: 1.3.0
 | 
						||
 | 
						||
        The resulting dtype will reflect the return value of the passed ``func``.
 | 
						||
 | 
						||
    >>> g.apply(lambda x: x*2 if x.name == 'a' else x/2)
 | 
						||
    a    0.0
 | 
						||
    a    2.0
 | 
						||
    b    1.0
 | 
						||
    dtype: float64
 | 
						||
 | 
						||
    Example 2: The function passed to `apply` takes a Series as
 | 
						||
    its argument and returns a scalar. `apply` combines the result for
 | 
						||
    each group together into a Series, including setting the index as
 | 
						||
    appropriate:
 | 
						||
 | 
						||
    >>> g.apply(lambda x: x.max() - x.min())
 | 
						||
    a    1
 | 
						||
    b    0
 | 
						||
    dtype: int64""",
 | 
						||
}
 | 
						||
 | 
						||
_groupby_agg_method_template = """
 | 
						||
Compute {fname} of group values.
 | 
						||
 | 
						||
Parameters
 | 
						||
----------
 | 
						||
numeric_only : bool, default {no}
 | 
						||
    Include only float, int, boolean columns. If None, will attempt to use
 | 
						||
    everything, then use only numeric data.
 | 
						||
min_count : int, default {mc}
 | 
						||
    The required number of valid values to perform the operation. If fewer
 | 
						||
    than ``min_count`` non-NA values are present the result will be NA.
 | 
						||
 | 
						||
Returns
 | 
						||
-------
 | 
						||
Series or DataFrame
 | 
						||
    Computed {fname} of values within each group.
 | 
						||
"""
 | 
						||
 | 
						||
_pipe_template = """
 | 
						||
Apply a function `func` with arguments to this %(klass)s object and return
 | 
						||
the function's result.
 | 
						||
 | 
						||
Use `.pipe` when you want to improve readability by chaining together
 | 
						||
functions that expect Series, DataFrames, GroupBy or Resampler objects.
 | 
						||
Instead of writing
 | 
						||
 | 
						||
>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c)  # doctest: +SKIP
 | 
						||
 | 
						||
You can write
 | 
						||
 | 
						||
>>> (df.groupby('group')
 | 
						||
...    .pipe(f)
 | 
						||
...    .pipe(g, arg1=a)
 | 
						||
...    .pipe(h, arg2=b, arg3=c))  # doctest: +SKIP
 | 
						||
 | 
						||
which is much more readable.
 | 
						||
 | 
						||
Parameters
 | 
						||
----------
 | 
						||
func : callable or tuple of (callable, str)
 | 
						||
    Function to apply to this %(klass)s object or, alternatively,
 | 
						||
    a `(callable, data_keyword)` tuple where `data_keyword` is a
 | 
						||
    string indicating the keyword of `callable` that expects the
 | 
						||
    %(klass)s object.
 | 
						||
args : iterable, optional
 | 
						||
       Positional arguments passed into `func`.
 | 
						||
kwargs : dict, optional
 | 
						||
         A dictionary of keyword arguments passed into `func`.
 | 
						||
 | 
						||
Returns
 | 
						||
-------
 | 
						||
object : the return type of `func`.
 | 
						||
 | 
						||
See Also
 | 
						||
--------
 | 
						||
Series.pipe : Apply a function with arguments to a series.
 | 
						||
DataFrame.pipe: Apply a function with arguments to a dataframe.
 | 
						||
apply : Apply function to each group instead of to the
 | 
						||
    full %(klass)s object.
 | 
						||
 | 
						||
Notes
 | 
						||
-----
 | 
						||
See more `here
 | 
						||
<https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`_
 | 
						||
 | 
						||
Examples
 | 
						||
--------
 | 
						||
%(examples)s
 | 
						||
"""
 | 
						||
 | 
						||
_transform_template = """
 | 
						||
Call function producing a like-indexed %(klass)s on each group and
 | 
						||
return a %(klass)s having the same indexes as the original object
 | 
						||
filled with the transformed values.
 | 
						||
 | 
						||
Parameters
 | 
						||
----------
 | 
						||
f : function
 | 
						||
    Function to apply to each group.
 | 
						||
 | 
						||
    Can also accept a Numba JIT function with
 | 
						||
    ``engine='numba'`` specified.
 | 
						||
 | 
						||
    If the ``'numba'`` engine is chosen, the function must be
 | 
						||
    a user defined function with ``values`` and ``index`` as the
 | 
						||
    first and second arguments respectively in the function signature.
 | 
						||
    Each group's index will be passed to the user defined function
 | 
						||
    and optionally available for use.
 | 
						||
 | 
						||
    .. versionchanged:: 1.1.0
 | 
						||
*args
 | 
						||
    Positional arguments to pass to func.
 | 
						||
engine : str, default None
 | 
						||
    * ``'cython'`` : Runs the function through C-extensions from cython.
 | 
						||
    * ``'numba'`` : Runs the function through JIT compiled code from numba.
 | 
						||
    * ``None`` : Defaults to ``'cython'`` or the global setting ``compute.use_numba``
 | 
						||
 | 
						||
    .. versionadded:: 1.1.0
 | 
						||
engine_kwargs : dict, default None
 | 
						||
    * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
 | 
						||
    * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
 | 
						||
      and ``parallel`` dictionary keys. The values must either be ``True`` or
 | 
						||
      ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
 | 
						||
      ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
 | 
						||
      applied to the function
 | 
						||
 | 
						||
    .. versionadded:: 1.1.0
 | 
						||
**kwargs
 | 
						||
    Keyword arguments to be passed into func.
 | 
						||
 | 
						||
Returns
 | 
						||
-------
 | 
						||
%(klass)s
 | 
						||
 | 
						||
See Also
 | 
						||
--------
 | 
						||
%(klass)s.groupby.apply : Apply function ``func`` group-wise and combine
 | 
						||
    the results together.
 | 
						||
%(klass)s.groupby.aggregate : Aggregate using one or more
 | 
						||
    operations over the specified axis.
 | 
						||
%(klass)s.transform : Call ``func`` on self producing a %(klass)s with the
 | 
						||
    same axis shape as self.
 | 
						||
 | 
						||
Notes
 | 
						||
-----
 | 
						||
Each group is endowed the attribute 'name' in case you need to know
 | 
						||
which group you are working on.
 | 
						||
 | 
						||
The current implementation imposes three requirements on f:
 | 
						||
 | 
						||
* f must return a value that either has the same shape as the input
 | 
						||
  subframe or can be broadcast to the shape of the input subframe.
 | 
						||
  For example, if `f` returns a scalar it will be broadcast to have the
 | 
						||
  same shape as the input subframe.
 | 
						||
* if this is a DataFrame, f must support application column-by-column
 | 
						||
  in the subframe. If f also supports application to the entire subframe,
 | 
						||
  then a fast path is used starting from the second chunk.
 | 
						||
* f must not mutate groups. Mutation is not supported and may
 | 
						||
  produce unexpected results. See :ref:`gotchas.udf-mutation` for more details.
 | 
						||
 | 
						||
When using ``engine='numba'``, there will be no "fall back" behavior internally.
 | 
						||
The group data and group index will be passed as numpy arrays to the JITed
 | 
						||
user defined function, and no alternative execution attempts will be tried.
 | 
						||
 | 
						||
.. versionchanged:: 1.3.0
 | 
						||
 | 
						||
    The resulting dtype will reflect the return value of the passed ``func``,
 | 
						||
    see the examples below.
 | 
						||
 | 
						||
Examples
 | 
						||
--------
 | 
						||
 | 
						||
>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
 | 
						||
...                           'foo', 'bar'],
 | 
						||
...                    'B' : ['one', 'one', 'two', 'three',
 | 
						||
...                           'two', 'two'],
 | 
						||
...                    'C' : [1, 5, 5, 2, 5, 5],
 | 
						||
...                    'D' : [2.0, 5., 8., 1., 2., 9.]})
 | 
						||
>>> grouped = df.groupby('A')
 | 
						||
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
 | 
						||
          C         D
 | 
						||
0 -1.154701 -0.577350
 | 
						||
1  0.577350  0.000000
 | 
						||
2  0.577350  1.154701
 | 
						||
3 -1.154701 -1.000000
 | 
						||
4  0.577350 -0.577350
 | 
						||
5  0.577350  1.000000
 | 
						||
 | 
						||
Broadcast result of the transformation
 | 
						||
 | 
						||
>>> grouped.transform(lambda x: x.max() - x.min())
 | 
						||
   C    D
 | 
						||
0  4  6.0
 | 
						||
1  3  8.0
 | 
						||
2  4  6.0
 | 
						||
3  3  8.0
 | 
						||
4  4  6.0
 | 
						||
5  3  8.0
 | 
						||
 | 
						||
.. versionchanged:: 1.3.0
 | 
						||
 | 
						||
    The resulting dtype will reflect the return value of the passed ``func``,
 | 
						||
    for example:
 | 
						||
 | 
						||
>>> grouped[['C', 'D']].transform(lambda x: x.astype(int).max())
 | 
						||
   C  D
 | 
						||
0  5  8
 | 
						||
1  5  9
 | 
						||
2  5  8
 | 
						||
3  5  9
 | 
						||
4  5  8
 | 
						||
5  5  9
 | 
						||
"""
 | 
						||
 | 
						||
_agg_template = """
 | 
						||
Aggregate using one or more operations over the specified axis.
 | 
						||
 | 
						||
Parameters
 | 
						||
----------
 | 
						||
func : function, str, list or dict
 | 
						||
    Function to use for aggregating the data. If a function, must either
 | 
						||
    work when passed a {klass} or when passed to {klass}.apply.
 | 
						||
 | 
						||
    Accepted combinations are:
 | 
						||
 | 
						||
    - function
 | 
						||
    - string function name
 | 
						||
    - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
 | 
						||
    - dict of axis labels -> functions, function names or list of such.
 | 
						||
 | 
						||
    Can also accept a Numba JIT function with
 | 
						||
    ``engine='numba'`` specified. Only passing a single function is supported
 | 
						||
    with this engine.
 | 
						||
 | 
						||
    If the ``'numba'`` engine is chosen, the function must be
 | 
						||
    a user defined function with ``values`` and ``index`` as the
 | 
						||
    first and second arguments respectively in the function signature.
 | 
						||
    Each group's index will be passed to the user defined function
 | 
						||
    and optionally available for use.
 | 
						||
 | 
						||
    .. versionchanged:: 1.1.0
 | 
						||
*args
 | 
						||
    Positional arguments to pass to func.
 | 
						||
engine : str, default None
 | 
						||
    * ``'cython'`` : Runs the function through C-extensions from cython.
 | 
						||
    * ``'numba'`` : Runs the function through JIT compiled code from numba.
 | 
						||
    * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
 | 
						||
 | 
						||
    .. versionadded:: 1.1.0
 | 
						||
engine_kwargs : dict, default None
 | 
						||
    * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
 | 
						||
    * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
 | 
						||
      and ``parallel`` dictionary keys. The values must either be ``True`` or
 | 
						||
      ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
 | 
						||
      ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be
 | 
						||
      applied to the function
 | 
						||
 | 
						||
    .. versionadded:: 1.1.0
 | 
						||
**kwargs
 | 
						||
    Keyword arguments to be passed into func.
 | 
						||
 | 
						||
Returns
 | 
						||
-------
 | 
						||
{klass}
 | 
						||
 | 
						||
See Also
 | 
						||
--------
 | 
						||
{klass}.groupby.apply : Apply function func group-wise
 | 
						||
    and combine the results together.
 | 
						||
{klass}.groupby.transform : Aggregate using one or more
 | 
						||
    operations over the specified axis.
 | 
						||
{klass}.aggregate : Transforms the Series on each group
 | 
						||
    based on the given function.
 | 
						||
 | 
						||
Notes
 | 
						||
-----
 | 
						||
When using ``engine='numba'``, there will be no "fall back" behavior internally.
 | 
						||
The group data and group index will be passed as numpy arrays to the JITed
 | 
						||
user defined function, and no alternative execution attempts will be tried.
 | 
						||
 | 
						||
Functions that mutate the passed object can produce unexpected
 | 
						||
behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
 | 
						||
for more details.
 | 
						||
 | 
						||
.. versionchanged:: 1.3.0
 | 
						||
 | 
						||
    The resulting dtype will reflect the return value of the passed ``func``,
 | 
						||
    see the examples below.
 | 
						||
{examples}"""
 | 
						||
 | 
						||
 | 
						||
@final
 | 
						||
class GroupByPlot(PandasObject):
 | 
						||
    """
 | 
						||
    Class implementing the .plot attribute for groupby objects.
 | 
						||
    """
 | 
						||
 | 
						||
    def __init__(self, groupby: GroupBy):
 | 
						||
        self._groupby = groupby
 | 
						||
 | 
						||
    def __call__(self, *args, **kwargs):
 | 
						||
        def f(self):
 | 
						||
            return self.plot(*args, **kwargs)
 | 
						||
 | 
						||
        f.__name__ = "plot"
 | 
						||
        return self._groupby.apply(f)
 | 
						||
 | 
						||
    def __getattr__(self, name: str):
 | 
						||
        def attr(*args, **kwargs):
 | 
						||
            def f(self):
 | 
						||
                return getattr(self.plot, name)(*args, **kwargs)
 | 
						||
 | 
						||
            return self._groupby.apply(f)
 | 
						||
 | 
						||
        return attr
 | 
						||
 | 
						||
 | 
						||
_KeysArgType = Union[
 | 
						||
    Hashable,
 | 
						||
    List[Hashable],
 | 
						||
    Callable[[Hashable], Hashable],
 | 
						||
    List[Callable[[Hashable], Hashable]],
 | 
						||
    Mapping[Hashable, Hashable],
 | 
						||
]
 | 
						||
 | 
						||
 | 
						||
class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
 | 
						||
    _group_selection: IndexLabel | None = None
 | 
						||
    _apply_allowlist: frozenset[str] = frozenset()
 | 
						||
    _hidden_attrs = PandasObject._hidden_attrs | {
 | 
						||
        "as_index",
 | 
						||
        "axis",
 | 
						||
        "dropna",
 | 
						||
        "exclusions",
 | 
						||
        "grouper",
 | 
						||
        "group_keys",
 | 
						||
        "keys",
 | 
						||
        "level",
 | 
						||
        "mutated",
 | 
						||
        "obj",
 | 
						||
        "observed",
 | 
						||
        "sort",
 | 
						||
        "squeeze",
 | 
						||
    }
 | 
						||
 | 
						||
    axis: int
 | 
						||
    grouper: ops.BaseGrouper
 | 
						||
    group_keys: bool
 | 
						||
 | 
						||
    @final
 | 
						||
    def __len__(self) -> int:
 | 
						||
        return len(self.groups)
 | 
						||
 | 
						||
    @final
 | 
						||
    def __repr__(self) -> str:
 | 
						||
        # TODO: Better repr for GroupBy object
 | 
						||
        return object.__repr__(self)
 | 
						||
 | 
						||
    @final
 | 
						||
    @property
 | 
						||
    def groups(self) -> dict[Hashable, np.ndarray]:
 | 
						||
        """
 | 
						||
        Dict {group name -> group labels}.
 | 
						||
        """
 | 
						||
        return self.grouper.groups
 | 
						||
 | 
						||
    @final
 | 
						||
    @property
 | 
						||
    def ngroups(self) -> int:
 | 
						||
        return self.grouper.ngroups
 | 
						||
 | 
						||
    @final
 | 
						||
    @property
 | 
						||
    def indices(self):
 | 
						||
        """
 | 
						||
        Dict {group name -> group indices}.
 | 
						||
        """
 | 
						||
        return self.grouper.indices
 | 
						||
 | 
						||
    @final
 | 
						||
    def _get_indices(self, names):
 | 
						||
        """
 | 
						||
        Safe get multiple indices, translate keys for
 | 
						||
        datelike to underlying repr.
 | 
						||
        """
 | 
						||
 | 
						||
        def get_converter(s):
 | 
						||
            # possibly convert to the actual key types
 | 
						||
            # in the indices, could be a Timestamp or a np.datetime64
 | 
						||
            if isinstance(s, datetime.datetime):
 | 
						||
                return lambda key: Timestamp(key)
 | 
						||
            elif isinstance(s, np.datetime64):
 | 
						||
                return lambda key: Timestamp(key).asm8
 | 
						||
            else:
 | 
						||
                return lambda key: key
 | 
						||
 | 
						||
        if len(names) == 0:
 | 
						||
            return []
 | 
						||
 | 
						||
        if len(self.indices) > 0:
 | 
						||
            index_sample = next(iter(self.indices))
 | 
						||
        else:
 | 
						||
            index_sample = None  # Dummy sample
 | 
						||
 | 
						||
        name_sample = names[0]
 | 
						||
        if isinstance(index_sample, tuple):
 | 
						||
            if not isinstance(name_sample, tuple):
 | 
						||
                msg = "must supply a tuple to get_group with multiple grouping keys"
 | 
						||
                raise ValueError(msg)
 | 
						||
            if not len(name_sample) == len(index_sample):
 | 
						||
                try:
 | 
						||
                    # If the original grouper was a tuple
 | 
						||
                    return [self.indices[name] for name in names]
 | 
						||
                except KeyError as err:
 | 
						||
                    # turns out it wasn't a tuple
 | 
						||
                    msg = (
 | 
						||
                        "must supply a same-length tuple to get_group "
 | 
						||
                        "with multiple grouping keys"
 | 
						||
                    )
 | 
						||
                    raise ValueError(msg) from err
 | 
						||
 | 
						||
            converters = [get_converter(s) for s in index_sample]
 | 
						||
            names = (tuple(f(n) for f, n in zip(converters, name)) for name in names)
 | 
						||
 | 
						||
        else:
 | 
						||
            converter = get_converter(index_sample)
 | 
						||
            names = (converter(name) for name in names)
 | 
						||
 | 
						||
        return [self.indices.get(name, []) for name in names]
 | 
						||
 | 
						||
    @final
 | 
						||
    def _get_index(self, name):
 | 
						||
        """
 | 
						||
        Safe get index, translate keys for datelike to underlying repr.
 | 
						||
        """
 | 
						||
        return self._get_indices([name])[0]
 | 
						||
 | 
						||
    @final
 | 
						||
    @cache_readonly
 | 
						||
    def _selected_obj(self):
 | 
						||
        # Note: _selected_obj is always just `self.obj` for SeriesGroupBy
 | 
						||
 | 
						||
        if self._selection is None or isinstance(self.obj, Series):
 | 
						||
            if self._group_selection is not None:
 | 
						||
                return self.obj[self._group_selection]
 | 
						||
            return self.obj
 | 
						||
        else:
 | 
						||
            return self.obj[self._selection]
 | 
						||
 | 
						||
    @final
 | 
						||
    def _dir_additions(self) -> set[str]:
 | 
						||
        return self.obj._dir_additions() | self._apply_allowlist
 | 
						||
 | 
						||
    @Substitution(
 | 
						||
        klass="GroupBy",
 | 
						||
        examples=dedent(
 | 
						||
            """\
 | 
						||
        >>> df = pd.DataFrame({'A': 'a b a b'.split(), 'B': [1, 2, 3, 4]})
 | 
						||
        >>> df
 | 
						||
           A  B
 | 
						||
        0  a  1
 | 
						||
        1  b  2
 | 
						||
        2  a  3
 | 
						||
        3  b  4
 | 
						||
 | 
						||
        To get the difference between each groups maximum and minimum value in one
 | 
						||
        pass, you can do
 | 
						||
 | 
						||
        >>> df.groupby('A').pipe(lambda x: x.max() - x.min())
 | 
						||
           B
 | 
						||
        A
 | 
						||
        a  2
 | 
						||
        b  2"""
 | 
						||
        ),
 | 
						||
    )
 | 
						||
    @Appender(_pipe_template)
 | 
						||
    def pipe(
 | 
						||
        self,
 | 
						||
        func: Callable[..., T] | tuple[Callable[..., T], str],
 | 
						||
        *args,
 | 
						||
        **kwargs,
 | 
						||
    ) -> T:
 | 
						||
        return com.pipe(self, func, *args, **kwargs)
 | 
						||
 | 
						||
    plot = property(GroupByPlot)
 | 
						||
 | 
						||
    @final
 | 
						||
    def get_group(self, name, obj=None) -> DataFrame | Series:
 | 
						||
        """
 | 
						||
        Construct DataFrame from group with provided name.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        name : object
 | 
						||
            The name of the group to get as a DataFrame.
 | 
						||
        obj : DataFrame, default None
 | 
						||
            The DataFrame to take the DataFrame out of.  If
 | 
						||
            it is None, the object groupby was called on will
 | 
						||
            be used.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        group : same type as obj
 | 
						||
        """
 | 
						||
        if obj is None:
 | 
						||
            obj = self._selected_obj
 | 
						||
 | 
						||
        inds = self._get_index(name)
 | 
						||
        if not len(inds):
 | 
						||
            raise KeyError(name)
 | 
						||
 | 
						||
        return obj._take_with_is_copy(inds, axis=self.axis)
 | 
						||
 | 
						||
    @final
 | 
						||
    def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]:
 | 
						||
        """
 | 
						||
        Groupby iterator.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Generator yielding sequence of (name, subsetted object)
 | 
						||
        for each group
 | 
						||
        """
 | 
						||
        return self.grouper.get_iterator(self._selected_obj, axis=self.axis)
 | 
						||
 | 
						||
 | 
						||
# To track operations that expand dimensions, like ohlc
 | 
						||
OutputFrameOrSeries = TypeVar("OutputFrameOrSeries", bound=NDFrame)
 | 
						||
 | 
						||
 | 
						||
class GroupBy(BaseGroupBy[NDFrameT]):
 | 
						||
    """
 | 
						||
    Class for grouping and aggregating relational data.
 | 
						||
 | 
						||
    See aggregate, transform, and apply functions on this object.
 | 
						||
 | 
						||
    It's easiest to use obj.groupby(...) to use GroupBy, but you can also do:
 | 
						||
 | 
						||
    ::
 | 
						||
 | 
						||
        grouped = groupby(obj, ...)
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    obj : pandas object
 | 
						||
    axis : int, default 0
 | 
						||
    level : int, default None
 | 
						||
        Level of MultiIndex
 | 
						||
    groupings : list of Grouping objects
 | 
						||
        Most users should ignore this
 | 
						||
    exclusions : array-like, optional
 | 
						||
        List of columns to exclude
 | 
						||
    name : str
 | 
						||
        Most users should ignore this
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    **Attributes**
 | 
						||
    groups : dict
 | 
						||
        {group name -> group labels}
 | 
						||
    len(grouped) : int
 | 
						||
        Number of groups
 | 
						||
 | 
						||
    Notes
 | 
						||
    -----
 | 
						||
    After grouping, see aggregate, apply, and transform functions. Here are
 | 
						||
    some other brief notes about usage. When grouping by multiple groups, the
 | 
						||
    result index will be a MultiIndex (hierarchical) by default.
 | 
						||
 | 
						||
    Iteration produces (key, group) tuples, i.e. chunking the data by group. So
 | 
						||
    you can write code like:
 | 
						||
 | 
						||
    ::
 | 
						||
 | 
						||
        grouped = obj.groupby(keys, axis=axis)
 | 
						||
        for key, group in grouped:
 | 
						||
            # do something with the data
 | 
						||
 | 
						||
    Function calls on GroupBy, if not specially implemented, "dispatch" to the
 | 
						||
    grouped data. So if you group a DataFrame and wish to invoke the std()
 | 
						||
    method on each group, you can simply do:
 | 
						||
 | 
						||
    ::
 | 
						||
 | 
						||
        df.groupby(mapper).std()
 | 
						||
 | 
						||
    rather than
 | 
						||
 | 
						||
    ::
 | 
						||
 | 
						||
        df.groupby(mapper).aggregate(np.std)
 | 
						||
 | 
						||
    You can pass arguments to these "wrapped" functions, too.
 | 
						||
 | 
						||
    See the online documentation for full exposition on these topics and much
 | 
						||
    more
 | 
						||
    """
 | 
						||
 | 
						||
    grouper: ops.BaseGrouper
 | 
						||
    as_index: bool
 | 
						||
 | 
						||
    @final
 | 
						||
    def __init__(
 | 
						||
        self,
 | 
						||
        obj: NDFrameT,
 | 
						||
        keys: _KeysArgType | None = None,
 | 
						||
        axis: int = 0,
 | 
						||
        level: IndexLabel | None = None,
 | 
						||
        grouper: ops.BaseGrouper | None = None,
 | 
						||
        exclusions: frozenset[Hashable] | None = None,
 | 
						||
        selection: IndexLabel | None = None,
 | 
						||
        as_index: bool = True,
 | 
						||
        sort: bool = True,
 | 
						||
        group_keys: bool = True,
 | 
						||
        squeeze: bool = False,
 | 
						||
        observed: bool = False,
 | 
						||
        mutated: bool = False,
 | 
						||
        dropna: bool = True,
 | 
						||
    ):
 | 
						||
 | 
						||
        self._selection = selection
 | 
						||
 | 
						||
        assert isinstance(obj, NDFrame), type(obj)
 | 
						||
 | 
						||
        self.level = level
 | 
						||
 | 
						||
        if not as_index:
 | 
						||
            if not isinstance(obj, DataFrame):
 | 
						||
                raise TypeError("as_index=False only valid with DataFrame")
 | 
						||
            if axis != 0:
 | 
						||
                raise ValueError("as_index=False only valid for axis=0")
 | 
						||
 | 
						||
        self.as_index = as_index
 | 
						||
        self.keys = keys
 | 
						||
        self.sort = sort
 | 
						||
        self.group_keys = group_keys
 | 
						||
        self.squeeze = squeeze
 | 
						||
        self.observed = observed
 | 
						||
        self.mutated = mutated
 | 
						||
        self.dropna = dropna
 | 
						||
 | 
						||
        if grouper is None:
 | 
						||
            from pandas.core.groupby.grouper import get_grouper
 | 
						||
 | 
						||
            grouper, exclusions, obj = get_grouper(
 | 
						||
                obj,
 | 
						||
                keys,
 | 
						||
                axis=axis,
 | 
						||
                level=level,
 | 
						||
                sort=sort,
 | 
						||
                observed=observed,
 | 
						||
                mutated=self.mutated,
 | 
						||
                dropna=self.dropna,
 | 
						||
            )
 | 
						||
 | 
						||
        self.obj = obj
 | 
						||
        self.axis = obj._get_axis_number(axis)
 | 
						||
        self.grouper = grouper
 | 
						||
        self.exclusions = frozenset(exclusions) if exclusions else frozenset()
 | 
						||
 | 
						||
    def __getattr__(self, attr: str):
 | 
						||
        if attr in self._internal_names_set:
 | 
						||
            return object.__getattribute__(self, attr)
 | 
						||
        if attr in self.obj:
 | 
						||
            return self[attr]
 | 
						||
 | 
						||
        raise AttributeError(
 | 
						||
            f"'{type(self).__name__}' object has no attribute '{attr}'"
 | 
						||
        )
 | 
						||
 | 
						||
    def __getattribute__(self, attr: str):
 | 
						||
        # Intercept nth to allow both call and index
 | 
						||
        if attr == "nth":
 | 
						||
            return GroupByNthSelector(self)
 | 
						||
        elif attr == "nth_actual":
 | 
						||
            return super().__getattribute__("nth")
 | 
						||
        else:
 | 
						||
            return super().__getattribute__(attr)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _make_wrapper(self, name: str) -> Callable:
 | 
						||
        assert name in self._apply_allowlist
 | 
						||
 | 
						||
        with self._group_selection_context():
 | 
						||
            # need to setup the selection
 | 
						||
            # as are not passed directly but in the grouper
 | 
						||
            f = getattr(self._obj_with_exclusions, name)
 | 
						||
            if not isinstance(f, types.MethodType):
 | 
						||
                return self.apply(lambda self: getattr(self, name))
 | 
						||
 | 
						||
        f = getattr(type(self._obj_with_exclusions), name)
 | 
						||
        sig = inspect.signature(f)
 | 
						||
 | 
						||
        def wrapper(*args, **kwargs):
 | 
						||
            # a little trickery for aggregation functions that need an axis
 | 
						||
            # argument
 | 
						||
            if "axis" in sig.parameters:
 | 
						||
                if kwargs.get("axis", None) is None:
 | 
						||
                    kwargs["axis"] = self.axis
 | 
						||
 | 
						||
            def curried(x):
 | 
						||
                return f(x, *args, **kwargs)
 | 
						||
 | 
						||
            # preserve the name so we can detect it when calling plot methods,
 | 
						||
            # to avoid duplicates
 | 
						||
            curried.__name__ = name
 | 
						||
 | 
						||
            # special case otherwise extra plots are created when catching the
 | 
						||
            # exception below
 | 
						||
            if name in base.plotting_methods:
 | 
						||
                return self.apply(curried)
 | 
						||
 | 
						||
            return self._python_apply_general(curried, self._obj_with_exclusions)
 | 
						||
 | 
						||
        wrapper.__name__ = name
 | 
						||
        return wrapper
 | 
						||
 | 
						||
    # -----------------------------------------------------------------
 | 
						||
    # Selection
 | 
						||
 | 
						||
    @final
 | 
						||
    def _set_group_selection(self) -> None:
 | 
						||
        """
 | 
						||
        Create group based selection.
 | 
						||
 | 
						||
        Used when selection is not passed directly but instead via a grouper.
 | 
						||
 | 
						||
        NOTE: this should be paired with a call to _reset_group_selection
 | 
						||
        """
 | 
						||
        # This is a no-op for SeriesGroupBy
 | 
						||
        grp = self.grouper
 | 
						||
        if not (
 | 
						||
            self.as_index
 | 
						||
            and grp.groupings is not None
 | 
						||
            and self.obj.ndim > 1
 | 
						||
            and self._group_selection is None
 | 
						||
        ):
 | 
						||
            return
 | 
						||
 | 
						||
        groupers = [g.name for g in grp.groupings if g.level is None and g.in_axis]
 | 
						||
 | 
						||
        if len(groupers):
 | 
						||
            # GH12839 clear selected obj cache when group selection changes
 | 
						||
            ax = self.obj._info_axis
 | 
						||
            self._group_selection = ax.difference(Index(groupers), sort=False).tolist()
 | 
						||
            self._reset_cache("_selected_obj")
 | 
						||
 | 
						||
    @final
 | 
						||
    def _reset_group_selection(self) -> None:
 | 
						||
        """
 | 
						||
        Clear group based selection.
 | 
						||
 | 
						||
        Used for methods needing to return info on each group regardless of
 | 
						||
        whether a group selection was previously set.
 | 
						||
        """
 | 
						||
        if self._group_selection is not None:
 | 
						||
            # GH12839 clear cached selection too when changing group selection
 | 
						||
            self._group_selection = None
 | 
						||
            self._reset_cache("_selected_obj")
 | 
						||
 | 
						||
    @contextmanager
 | 
						||
    def _group_selection_context(self) -> Iterator[GroupBy]:
 | 
						||
        """
 | 
						||
        Set / reset the _group_selection_context.
 | 
						||
        """
 | 
						||
        self._set_group_selection()
 | 
						||
        try:
 | 
						||
            yield self
 | 
						||
        finally:
 | 
						||
            self._reset_group_selection()
 | 
						||
 | 
						||
    def _iterate_slices(self) -> Iterable[Series]:
 | 
						||
        raise AbstractMethodError(self)
 | 
						||
 | 
						||
    # -----------------------------------------------------------------
 | 
						||
    # Dispatch/Wrapping
 | 
						||
 | 
						||
    @final
 | 
						||
    def _concat_objects(self, values, not_indexed_same: bool = False):
 | 
						||
        from pandas.core.reshape.concat import concat
 | 
						||
 | 
						||
        def reset_identity(values):
 | 
						||
            # reset the identities of the components
 | 
						||
            # of the values to prevent aliasing
 | 
						||
            for v in com.not_none(*values):
 | 
						||
                ax = v._get_axis(self.axis)
 | 
						||
                ax._reset_identity()
 | 
						||
            return values
 | 
						||
 | 
						||
        if not not_indexed_same:
 | 
						||
            result = concat(values, axis=self.axis)
 | 
						||
 | 
						||
            ax = self._selected_obj._get_axis(self.axis)
 | 
						||
            if self.dropna:
 | 
						||
                labels = self.grouper.group_info[0]
 | 
						||
                mask = labels != -1
 | 
						||
                ax = ax[mask]
 | 
						||
 | 
						||
            # this is a very unfortunate situation
 | 
						||
            # we can't use reindex to restore the original order
 | 
						||
            # when the ax has duplicates
 | 
						||
            # so we resort to this
 | 
						||
            # GH 14776, 30667
 | 
						||
            if ax.has_duplicates and not result.axes[self.axis].equals(ax):
 | 
						||
                indexer, _ = result.index.get_indexer_non_unique(ax._values)
 | 
						||
                indexer = algorithms.unique1d(indexer)
 | 
						||
                result = result.take(indexer, axis=self.axis)
 | 
						||
            else:
 | 
						||
                result = result.reindex(ax, axis=self.axis, copy=False)
 | 
						||
 | 
						||
        elif self.group_keys:
 | 
						||
 | 
						||
            values = reset_identity(values)
 | 
						||
            if self.as_index:
 | 
						||
 | 
						||
                # possible MI return case
 | 
						||
                group_keys = self.grouper.result_index
 | 
						||
                group_levels = self.grouper.levels
 | 
						||
                group_names = self.grouper.names
 | 
						||
 | 
						||
                result = concat(
 | 
						||
                    values,
 | 
						||
                    axis=self.axis,
 | 
						||
                    keys=group_keys,
 | 
						||
                    levels=group_levels,
 | 
						||
                    names=group_names,
 | 
						||
                    sort=False,
 | 
						||
                )
 | 
						||
            else:
 | 
						||
 | 
						||
                # GH5610, returns a MI, with the first level being a
 | 
						||
                # range index
 | 
						||
                keys = list(range(len(values)))
 | 
						||
                result = concat(values, axis=self.axis, keys=keys)
 | 
						||
        else:
 | 
						||
            values = reset_identity(values)
 | 
						||
            result = concat(values, axis=self.axis)
 | 
						||
 | 
						||
        name = self.obj.name if self.obj.ndim == 1 else self._selection
 | 
						||
        if isinstance(result, Series) and name is not None:
 | 
						||
 | 
						||
            result.name = name
 | 
						||
 | 
						||
        return result
 | 
						||
 | 
						||
    @final
 | 
						||
    def _set_result_index_ordered(
 | 
						||
        self, result: OutputFrameOrSeries
 | 
						||
    ) -> OutputFrameOrSeries:
 | 
						||
        # set the result index on the passed values object and
 | 
						||
        # return the new object, xref 8046
 | 
						||
 | 
						||
        if self.grouper.is_monotonic:
 | 
						||
            # shortcut if we have an already ordered grouper
 | 
						||
            result.set_axis(self.obj._get_axis(self.axis), axis=self.axis, inplace=True)
 | 
						||
            return result
 | 
						||
 | 
						||
        # row order is scrambled => sort the rows by position in original index
 | 
						||
        original_positions = Index(
 | 
						||
            np.concatenate(self._get_indices(self.grouper.result_index))
 | 
						||
        )
 | 
						||
        result.set_axis(original_positions, axis=self.axis, inplace=True)
 | 
						||
        result = result.sort_index(axis=self.axis)
 | 
						||
 | 
						||
        dropped_rows = len(result.index) < len(self.obj.index)
 | 
						||
 | 
						||
        if dropped_rows:
 | 
						||
            # get index by slicing original index according to original positions
 | 
						||
            # slice drops attrs => use set_axis when no rows were dropped
 | 
						||
            sorted_indexer = result.index
 | 
						||
            result.index = self._selected_obj.index[sorted_indexer]
 | 
						||
        else:
 | 
						||
            result.set_axis(self.obj._get_axis(self.axis), axis=self.axis, inplace=True)
 | 
						||
 | 
						||
        return result
 | 
						||
 | 
						||
    def _indexed_output_to_ndframe(
 | 
						||
        self, result: Mapping[base.OutputKey, ArrayLike]
 | 
						||
    ) -> Series | DataFrame:
 | 
						||
        raise AbstractMethodError(self)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _wrap_aggregated_output(
 | 
						||
        self,
 | 
						||
        output: Series | DataFrame | Mapping[base.OutputKey, ArrayLike],
 | 
						||
        qs: npt.NDArray[np.float64] | None = None,
 | 
						||
    ):
 | 
						||
        """
 | 
						||
        Wraps the output of GroupBy aggregations into the expected result.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        output : Series, DataFrame, or Mapping[base.OutputKey, ArrayLike]
 | 
						||
           Data to wrap.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
        """
 | 
						||
 | 
						||
        if isinstance(output, (Series, DataFrame)):
 | 
						||
            # We get here (for DataFrameGroupBy) if we used Manager.grouped_reduce,
 | 
						||
            #  in which case our columns are already set correctly.
 | 
						||
            # ATM we do not get here for SeriesGroupBy; when we do, we will
 | 
						||
            #  need to require that result.name already match self.obj.name
 | 
						||
            result = output
 | 
						||
        else:
 | 
						||
            result = self._indexed_output_to_ndframe(output)
 | 
						||
 | 
						||
        if not self.as_index:
 | 
						||
            # `not self.as_index` is only relevant for DataFrameGroupBy,
 | 
						||
            #   enforced in __init__
 | 
						||
            self._insert_inaxis_grouper_inplace(result)
 | 
						||
            result = result._consolidate()
 | 
						||
            index = Index(range(self.grouper.ngroups))
 | 
						||
 | 
						||
        else:
 | 
						||
            index = self.grouper.result_index
 | 
						||
 | 
						||
        if qs is not None:
 | 
						||
            # We get here with len(qs) != 1 and not self.as_index
 | 
						||
            #  in test_pass_args_kwargs
 | 
						||
            index = _insert_quantile_level(index, qs)
 | 
						||
 | 
						||
        result.index = index
 | 
						||
 | 
						||
        if self.axis == 1:
 | 
						||
            # Only relevant for DataFrameGroupBy, no-op for SeriesGroupBy
 | 
						||
            result = result.T
 | 
						||
            if result.index.equals(self.obj.index):
 | 
						||
                # Retain e.g. DatetimeIndex/TimedeltaIndex freq
 | 
						||
                result.index = self.obj.index.copy()
 | 
						||
                # TODO: Do this more systematically
 | 
						||
 | 
						||
        return self._reindex_output(result, qs=qs)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _wrap_transformed_output(
 | 
						||
        self, output: Mapping[base.OutputKey, ArrayLike]
 | 
						||
    ) -> Series | DataFrame:
 | 
						||
        """
 | 
						||
        Wraps the output of GroupBy transformations into the expected result.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        output : Mapping[base.OutputKey, ArrayLike]
 | 
						||
            Data to wrap.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Series for SeriesGroupBy, DataFrame for DataFrameGroupBy
 | 
						||
        """
 | 
						||
        if isinstance(output, (Series, DataFrame)):
 | 
						||
            result = output
 | 
						||
        else:
 | 
						||
            result = self._indexed_output_to_ndframe(output)
 | 
						||
 | 
						||
        if self.axis == 1:
 | 
						||
            # Only relevant for DataFrameGroupBy
 | 
						||
            result = result.T
 | 
						||
            result.columns = self.obj.columns
 | 
						||
 | 
						||
        result.index = self.obj.index
 | 
						||
        return result
 | 
						||
 | 
						||
    def _wrap_applied_output(self, data, values: list, not_indexed_same: bool = False):
 | 
						||
        raise AbstractMethodError(self)
 | 
						||
 | 
						||
    def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
 | 
						||
        """
 | 
						||
        Determine subclass-specific default value for 'numeric_only'.
 | 
						||
 | 
						||
        For SeriesGroupBy we want the default to be False (to match Series behavior).
 | 
						||
        For DataFrameGroupBy we want it to be True (for backwards-compat).
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        numeric_only : bool or lib.no_default
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        bool
 | 
						||
        """
 | 
						||
        # GH#41291
 | 
						||
        if numeric_only is lib.no_default:
 | 
						||
            # i.e. not explicitly passed by user
 | 
						||
            if self.obj.ndim == 2:
 | 
						||
                # i.e. DataFrameGroupBy
 | 
						||
                numeric_only = True
 | 
						||
                # GH#42395 GH#43108 GH#43154
 | 
						||
                # Regression from 1.2.5 to 1.3 caused object columns to be dropped
 | 
						||
                if self.axis:
 | 
						||
                    obj = self._obj_with_exclusions.T
 | 
						||
                else:
 | 
						||
                    obj = self._obj_with_exclusions
 | 
						||
                check = obj._get_numeric_data()
 | 
						||
                if len(obj.columns) and not len(check.columns) and not obj.empty:
 | 
						||
                    numeric_only = False
 | 
						||
                    # TODO: v1.4+ Add FutureWarning
 | 
						||
 | 
						||
            else:
 | 
						||
                numeric_only = False
 | 
						||
 | 
						||
        # error: Incompatible return value type (got "Union[bool, NoDefault]",
 | 
						||
        # expected "bool")
 | 
						||
        return numeric_only  # type: ignore[return-value]
 | 
						||
 | 
						||
    # -----------------------------------------------------------------
 | 
						||
    # numba
 | 
						||
 | 
						||
    @final
 | 
						||
    def _numba_prep(self, func, data):
 | 
						||
        if not callable(func):
 | 
						||
            raise NotImplementedError(
 | 
						||
                "Numba engine can only be used with a single function."
 | 
						||
            )
 | 
						||
        ids, _, ngroups = self.grouper.group_info
 | 
						||
        sorted_index = get_group_index_sorter(ids, ngroups)
 | 
						||
        sorted_ids = algorithms.take_nd(ids, sorted_index, allow_fill=False)
 | 
						||
 | 
						||
        sorted_data = data.take(sorted_index, axis=self.axis).to_numpy()
 | 
						||
        if len(self.grouper.groupings) > 1:
 | 
						||
            raise NotImplementedError(
 | 
						||
                "More than 1 grouping labels are not supported with engine='numba'"
 | 
						||
            )
 | 
						||
        # GH 46867
 | 
						||
        index_data = data.index
 | 
						||
        if isinstance(index_data, MultiIndex):
 | 
						||
            group_key = self.grouper.groupings[0].name
 | 
						||
            index_data = index_data.get_level_values(group_key)
 | 
						||
        sorted_index_data = index_data.take(sorted_index).to_numpy()
 | 
						||
 | 
						||
        starts, ends = lib.generate_slices(sorted_ids, ngroups)
 | 
						||
        return (
 | 
						||
            starts,
 | 
						||
            ends,
 | 
						||
            sorted_index_data,
 | 
						||
            sorted_data,
 | 
						||
        )
 | 
						||
 | 
						||
    def _numba_agg_general(
 | 
						||
        self,
 | 
						||
        func: Callable,
 | 
						||
        engine_kwargs: dict[str, bool] | None,
 | 
						||
        numba_cache_key_str: str,
 | 
						||
        *aggregator_args,
 | 
						||
    ):
 | 
						||
        """
 | 
						||
        Perform groupby with a standard numerical aggregation function (e.g. mean)
 | 
						||
        with Numba.
 | 
						||
        """
 | 
						||
        if not self.as_index:
 | 
						||
            raise NotImplementedError(
 | 
						||
                "as_index=False is not supported. Use .reset_index() instead."
 | 
						||
            )
 | 
						||
        if self.axis == 1:
 | 
						||
            raise NotImplementedError("axis=1 is not supported.")
 | 
						||
 | 
						||
        with self._group_selection_context():
 | 
						||
            data = self._selected_obj
 | 
						||
        df = data if data.ndim == 2 else data.to_frame()
 | 
						||
        starts, ends, sorted_index, sorted_data = self._numba_prep(func, df)
 | 
						||
        aggregator = executor.generate_shared_aggregator(
 | 
						||
            func, engine_kwargs, numba_cache_key_str
 | 
						||
        )
 | 
						||
        result = aggregator(sorted_data, starts, ends, 0, *aggregator_args)
 | 
						||
 | 
						||
        cache_key = (func, numba_cache_key_str)
 | 
						||
        if cache_key not in NUMBA_FUNC_CACHE:
 | 
						||
            NUMBA_FUNC_CACHE[cache_key] = aggregator
 | 
						||
 | 
						||
        index = self.grouper.result_index
 | 
						||
        if data.ndim == 1:
 | 
						||
            result_kwargs = {"name": data.name}
 | 
						||
            result = result.ravel()
 | 
						||
        else:
 | 
						||
            result_kwargs = {"columns": data.columns}
 | 
						||
        return data._constructor(result, index=index, **result_kwargs)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _transform_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs):
 | 
						||
        """
 | 
						||
        Perform groupby transform routine with the numba engine.
 | 
						||
 | 
						||
        This routine mimics the data splitting routine of the DataSplitter class
 | 
						||
        to generate the indices of each group in the sorted data and then passes the
 | 
						||
        data and indices into a Numba jitted function.
 | 
						||
        """
 | 
						||
        starts, ends, sorted_index, sorted_data = self._numba_prep(func, data)
 | 
						||
 | 
						||
        numba_transform_func = numba_.generate_numba_transform_func(
 | 
						||
            kwargs, func, engine_kwargs
 | 
						||
        )
 | 
						||
        result = numba_transform_func(
 | 
						||
            sorted_data,
 | 
						||
            sorted_index,
 | 
						||
            starts,
 | 
						||
            ends,
 | 
						||
            len(data.columns),
 | 
						||
            *args,
 | 
						||
        )
 | 
						||
 | 
						||
        cache_key = (func, "groupby_transform")
 | 
						||
        if cache_key not in NUMBA_FUNC_CACHE:
 | 
						||
            NUMBA_FUNC_CACHE[cache_key] = numba_transform_func
 | 
						||
 | 
						||
        # result values needs to be resorted to their original positions since we
 | 
						||
        # evaluated the data sorted by group
 | 
						||
        return result.take(np.argsort(sorted_index), axis=0)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs):
 | 
						||
        """
 | 
						||
        Perform groupby aggregation routine with the numba engine.
 | 
						||
 | 
						||
        This routine mimics the data splitting routine of the DataSplitter class
 | 
						||
        to generate the indices of each group in the sorted data and then passes the
 | 
						||
        data and indices into a Numba jitted function.
 | 
						||
        """
 | 
						||
        starts, ends, sorted_index, sorted_data = self._numba_prep(func, data)
 | 
						||
 | 
						||
        numba_agg_func = numba_.generate_numba_agg_func(kwargs, func, engine_kwargs)
 | 
						||
        result = numba_agg_func(
 | 
						||
            sorted_data,
 | 
						||
            sorted_index,
 | 
						||
            starts,
 | 
						||
            ends,
 | 
						||
            len(data.columns),
 | 
						||
            *args,
 | 
						||
        )
 | 
						||
 | 
						||
        cache_key = (func, "groupby_agg")
 | 
						||
        if cache_key not in NUMBA_FUNC_CACHE:
 | 
						||
            NUMBA_FUNC_CACHE[cache_key] = numba_agg_func
 | 
						||
 | 
						||
        return result
 | 
						||
 | 
						||
    # -----------------------------------------------------------------
 | 
						||
    # apply/agg/transform
 | 
						||
 | 
						||
    @Appender(
 | 
						||
        _apply_docs["template"].format(
 | 
						||
            input="dataframe", examples=_apply_docs["dataframe_examples"]
 | 
						||
        )
 | 
						||
    )
 | 
						||
    def apply(self, func, *args, **kwargs):
 | 
						||
 | 
						||
        func = com.is_builtin_func(func)
 | 
						||
 | 
						||
        # this is needed so we don't try and wrap strings. If we could
 | 
						||
        # resolve functions to their callable functions prior, this
 | 
						||
        # wouldn't be needed
 | 
						||
        if args or kwargs:
 | 
						||
            if callable(func):
 | 
						||
 | 
						||
                @wraps(func)
 | 
						||
                def f(g):
 | 
						||
                    with np.errstate(all="ignore"):
 | 
						||
                        return func(g, *args, **kwargs)
 | 
						||
 | 
						||
            elif hasattr(nanops, "nan" + func):
 | 
						||
                # TODO: should we wrap this in to e.g. _is_builtin_func?
 | 
						||
                f = getattr(nanops, "nan" + func)
 | 
						||
 | 
						||
            else:
 | 
						||
                raise ValueError(
 | 
						||
                    "func must be a callable if args or kwargs are supplied"
 | 
						||
                )
 | 
						||
        elif isinstance(func, str):
 | 
						||
            if hasattr(self, func):
 | 
						||
                res = getattr(self, func)
 | 
						||
                if callable(res):
 | 
						||
                    return res()
 | 
						||
                return res
 | 
						||
 | 
						||
            else:
 | 
						||
                raise TypeError(f"apply func should be callable, not '{func}'")
 | 
						||
        else:
 | 
						||
 | 
						||
            f = func
 | 
						||
 | 
						||
        # ignore SettingWithCopy here in case the user mutates
 | 
						||
        with option_context("mode.chained_assignment", None):
 | 
						||
            try:
 | 
						||
                result = self._python_apply_general(f, self._selected_obj)
 | 
						||
            except TypeError:
 | 
						||
                # gh-20949
 | 
						||
                # try again, with .apply acting as a filtering
 | 
						||
                # operation, by excluding the grouping column
 | 
						||
                # This would normally not be triggered
 | 
						||
                # except if the udf is trying an operation that
 | 
						||
                # fails on *some* columns, e.g. a numeric operation
 | 
						||
                # on a string grouper column
 | 
						||
 | 
						||
                with self._group_selection_context():
 | 
						||
                    return self._python_apply_general(f, self._selected_obj)
 | 
						||
 | 
						||
        return result
 | 
						||
 | 
						||
    @final
 | 
						||
    def _python_apply_general(
 | 
						||
        self,
 | 
						||
        f: Callable,
 | 
						||
        data: DataFrame | Series,
 | 
						||
        not_indexed_same: bool | None = None,
 | 
						||
    ) -> DataFrame | Series:
 | 
						||
        """
 | 
						||
        Apply function f in python space
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        f : callable
 | 
						||
            Function to apply
 | 
						||
        data : Series or DataFrame
 | 
						||
            Data to apply f to
 | 
						||
        not_indexed_same: bool, optional
 | 
						||
            When specified, overrides the value of not_indexed_same. Apply behaves
 | 
						||
            differently when the result index is equal to the input index, but
 | 
						||
            this can be coincidental leading to value-dependent behavior.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            data after applying f
 | 
						||
        """
 | 
						||
        values, mutated = self.grouper.apply(f, data, self.axis)
 | 
						||
 | 
						||
        if not_indexed_same is None:
 | 
						||
            not_indexed_same = mutated or self.mutated
 | 
						||
 | 
						||
        return self._wrap_applied_output(
 | 
						||
            data, values, not_indexed_same=not_indexed_same
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    def _python_agg_general(self, func, *args, **kwargs):
 | 
						||
        func = com.is_builtin_func(func)
 | 
						||
        f = lambda x: func(x, *args, **kwargs)
 | 
						||
 | 
						||
        # iterate through "columns" ex exclusions to populate output dict
 | 
						||
        output: dict[base.OutputKey, ArrayLike] = {}
 | 
						||
 | 
						||
        if self.ngroups == 0:
 | 
						||
            # agg_series below assumes ngroups > 0
 | 
						||
            return self._python_apply_general(f, self._selected_obj)
 | 
						||
 | 
						||
        for idx, obj in enumerate(self._iterate_slices()):
 | 
						||
            name = obj.name
 | 
						||
 | 
						||
            try:
 | 
						||
                # if this function is invalid for this dtype, we will ignore it.
 | 
						||
                result = self.grouper.agg_series(obj, f)
 | 
						||
            except TypeError:
 | 
						||
                warn_dropping_nuisance_columns_deprecated(type(self), "agg")
 | 
						||
                continue
 | 
						||
 | 
						||
            key = base.OutputKey(label=name, position=idx)
 | 
						||
            output[key] = result
 | 
						||
 | 
						||
        if not output:
 | 
						||
            return self._python_apply_general(f, self._selected_obj)
 | 
						||
 | 
						||
        return self._wrap_aggregated_output(output)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _agg_general(
 | 
						||
        self,
 | 
						||
        numeric_only: bool = True,
 | 
						||
        min_count: int = -1,
 | 
						||
        *,
 | 
						||
        alias: str,
 | 
						||
        npfunc: Callable,
 | 
						||
    ):
 | 
						||
 | 
						||
        with self._group_selection_context():
 | 
						||
            # try a cython aggregation if we can
 | 
						||
            result = self._cython_agg_general(
 | 
						||
                how=alias,
 | 
						||
                alt=npfunc,
 | 
						||
                numeric_only=numeric_only,
 | 
						||
                min_count=min_count,
 | 
						||
            )
 | 
						||
            return result.__finalize__(self.obj, method="groupby")
 | 
						||
 | 
						||
    def _agg_py_fallback(
 | 
						||
        self, values: ArrayLike, ndim: int, alt: Callable
 | 
						||
    ) -> ArrayLike:
 | 
						||
        """
 | 
						||
        Fallback to pure-python aggregation if _cython_operation raises
 | 
						||
        NotImplementedError.
 | 
						||
        """
 | 
						||
        # We get here with a) EADtypes and b) object dtype
 | 
						||
 | 
						||
        if values.ndim == 1:
 | 
						||
            # For DataFrameGroupBy we only get here with ExtensionArray
 | 
						||
            ser = Series(values)
 | 
						||
        else:
 | 
						||
            # We only get here with values.dtype == object
 | 
						||
            # TODO: special case not needed with ArrayManager
 | 
						||
            df = DataFrame(values.T)
 | 
						||
            # bc we split object blocks in grouped_reduce, we have only 1 col
 | 
						||
            # otherwise we'd have to worry about block-splitting GH#39329
 | 
						||
            assert df.shape[1] == 1
 | 
						||
            # Avoid call to self.values that can occur in DataFrame
 | 
						||
            #  reductions; see GH#28949
 | 
						||
            ser = df.iloc[:, 0]
 | 
						||
 | 
						||
        # We do not get here with UDFs, so we know that our dtype
 | 
						||
        #  should always be preserved by the implemented aggregations
 | 
						||
        # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
 | 
						||
        res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
 | 
						||
 | 
						||
        if isinstance(values, Categorical):
 | 
						||
            # Because we only get here with known dtype-preserving
 | 
						||
            #  reductions, we cast back to Categorical.
 | 
						||
            # TODO: if we ever get "rank" working, exclude it here.
 | 
						||
            res_values = type(values)._from_sequence(res_values, dtype=values.dtype)
 | 
						||
 | 
						||
        # If we are DataFrameGroupBy and went through a SeriesGroupByPath
 | 
						||
        # then we need to reshape
 | 
						||
        # GH#32223 includes case with IntegerArray values, ndarray res_values
 | 
						||
        # test_groupby_duplicate_columns with object dtype values
 | 
						||
        return ensure_block_shape(res_values, ndim=ndim)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _cython_agg_general(
 | 
						||
        self, how: str, alt: Callable, numeric_only: bool, min_count: int = -1
 | 
						||
    ):
 | 
						||
        # Note: we never get here with how="ohlc" for DataFrameGroupBy;
 | 
						||
        #  that goes through SeriesGroupBy
 | 
						||
 | 
						||
        data = self._get_data_to_aggregate()
 | 
						||
        is_ser = data.ndim == 1
 | 
						||
 | 
						||
        if numeric_only:
 | 
						||
            if is_ser and not is_numeric_dtype(self._selected_obj.dtype):
 | 
						||
                # GH#41291 match Series behavior
 | 
						||
                kwd_name = "numeric_only"
 | 
						||
                if how in ["any", "all"]:
 | 
						||
                    kwd_name = "bool_only"
 | 
						||
                raise NotImplementedError(
 | 
						||
                    f"{type(self).__name__}.{how} does not implement {kwd_name}."
 | 
						||
                )
 | 
						||
            elif not is_ser:
 | 
						||
                data = data.get_numeric_data(copy=False)
 | 
						||
 | 
						||
        def array_func(values: ArrayLike) -> ArrayLike:
 | 
						||
            try:
 | 
						||
                result = self.grouper._cython_operation(
 | 
						||
                    "aggregate", values, how, axis=data.ndim - 1, min_count=min_count
 | 
						||
                )
 | 
						||
            except NotImplementedError:
 | 
						||
                # generally if we have numeric_only=False
 | 
						||
                # and non-applicable functions
 | 
						||
                # try to python agg
 | 
						||
                # TODO: shouldn't min_count matter?
 | 
						||
                result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
 | 
						||
 | 
						||
            return result
 | 
						||
 | 
						||
        # TypeError -> we may have an exception in trying to aggregate
 | 
						||
        #  continue and exclude the block
 | 
						||
        new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
 | 
						||
 | 
						||
        if not is_ser and len(new_mgr) < len(data):
 | 
						||
            warn_dropping_nuisance_columns_deprecated(type(self), how)
 | 
						||
 | 
						||
        res = self._wrap_agged_manager(new_mgr)
 | 
						||
        if is_ser:
 | 
						||
            res.index = self.grouper.result_index
 | 
						||
            return self._reindex_output(res)
 | 
						||
        else:
 | 
						||
            return res
 | 
						||
 | 
						||
    def _cython_transform(
 | 
						||
        self, how: str, numeric_only: bool = True, axis: int = 0, **kwargs
 | 
						||
    ):
 | 
						||
        raise AbstractMethodError(self)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
 | 
						||
 | 
						||
        if maybe_use_numba(engine):
 | 
						||
            # TODO: tests with self._selected_obj.ndim == 1 on DataFrameGroupBy
 | 
						||
            with self._group_selection_context():
 | 
						||
                data = self._selected_obj
 | 
						||
            df = data if data.ndim == 2 else data.to_frame()
 | 
						||
            result = self._transform_with_numba(
 | 
						||
                df, func, *args, engine_kwargs=engine_kwargs, **kwargs
 | 
						||
            )
 | 
						||
            if self.obj.ndim == 2:
 | 
						||
                return cast(DataFrame, self.obj)._constructor(
 | 
						||
                    result, index=data.index, columns=data.columns
 | 
						||
                )
 | 
						||
            else:
 | 
						||
                return cast(Series, self.obj)._constructor(
 | 
						||
                    result.ravel(), index=data.index, name=data.name
 | 
						||
                )
 | 
						||
 | 
						||
        # optimized transforms
 | 
						||
        func = com.get_cython_func(func) or func
 | 
						||
 | 
						||
        if not isinstance(func, str):
 | 
						||
            return self._transform_general(func, *args, **kwargs)
 | 
						||
 | 
						||
        elif func not in base.transform_kernel_allowlist:
 | 
						||
            msg = f"'{func}' is not a valid function name for transform(name)"
 | 
						||
            raise ValueError(msg)
 | 
						||
        elif func in base.cythonized_kernels or func in base.transformation_kernels:
 | 
						||
            # cythonized transform or canned "agg+broadcast"
 | 
						||
            return getattr(self, func)(*args, **kwargs)
 | 
						||
 | 
						||
        else:
 | 
						||
            # i.e. func in base.reduction_kernels
 | 
						||
 | 
						||
            # GH#30918 Use _transform_fast only when we know func is an aggregation
 | 
						||
            # If func is a reduction, we need to broadcast the
 | 
						||
            # result to the whole group. Compute func result
 | 
						||
            # and deal with possible broadcasting below.
 | 
						||
            # Temporarily set observed for dealing with categoricals.
 | 
						||
            with com.temp_setattr(self, "observed", True):
 | 
						||
                result = getattr(self, func)(*args, **kwargs)
 | 
						||
 | 
						||
            if self._can_use_transform_fast(result):
 | 
						||
                return self._wrap_transform_fast_result(result)
 | 
						||
 | 
						||
            # only reached for DataFrameGroupBy
 | 
						||
            return self._transform_general(func, *args, **kwargs)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT:
 | 
						||
        """
 | 
						||
        Fast transform path for aggregations.
 | 
						||
        """
 | 
						||
        obj = self._obj_with_exclusions
 | 
						||
 | 
						||
        # for each col, reshape to size of original frame by take operation
 | 
						||
        ids, _, _ = self.grouper.group_info
 | 
						||
        result = result.reindex(self.grouper.result_index, copy=False)
 | 
						||
 | 
						||
        if self.obj.ndim == 1:
 | 
						||
            # i.e. SeriesGroupBy
 | 
						||
            out = algorithms.take_nd(result._values, ids)
 | 
						||
            output = obj._constructor(out, index=obj.index, name=obj.name)
 | 
						||
        else:
 | 
						||
            output = result.take(ids, axis=0)
 | 
						||
            output.index = obj.index
 | 
						||
        return output
 | 
						||
 | 
						||
    # -----------------------------------------------------------------
 | 
						||
    # Utilities
 | 
						||
 | 
						||
    @final
 | 
						||
    def _apply_filter(self, indices, dropna):
 | 
						||
        if len(indices) == 0:
 | 
						||
            indices = np.array([], dtype="int64")
 | 
						||
        else:
 | 
						||
            indices = np.sort(np.concatenate(indices))
 | 
						||
        if dropna:
 | 
						||
            filtered = self._selected_obj.take(indices, axis=self.axis)
 | 
						||
        else:
 | 
						||
            mask = np.empty(len(self._selected_obj.index), dtype=bool)
 | 
						||
            mask.fill(False)
 | 
						||
            mask[indices.astype(int)] = True
 | 
						||
            # mask fails to broadcast when passed to where; broadcast manually.
 | 
						||
            mask = np.tile(mask, list(self._selected_obj.shape[1:]) + [1]).T
 | 
						||
            filtered = self._selected_obj.where(mask)  # Fill with NaNs.
 | 
						||
        return filtered
 | 
						||
 | 
						||
    @final
 | 
						||
    def _cumcount_array(self, ascending: bool = True) -> np.ndarray:
 | 
						||
        """
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        ascending : bool, default True
 | 
						||
            If False, number in reverse, from length of group - 1 to 0.
 | 
						||
 | 
						||
        Notes
 | 
						||
        -----
 | 
						||
        this is currently implementing sort=False
 | 
						||
        (though the default is sort=True) for groupby in general
 | 
						||
        """
 | 
						||
        ids, _, ngroups = self.grouper.group_info
 | 
						||
        sorter = get_group_index_sorter(ids, ngroups)
 | 
						||
        ids, count = ids[sorter], len(ids)
 | 
						||
 | 
						||
        if count == 0:
 | 
						||
            return np.empty(0, dtype=np.int64)
 | 
						||
 | 
						||
        run = np.r_[True, ids[:-1] != ids[1:]]
 | 
						||
        rep = np.diff(np.r_[np.nonzero(run)[0], count])
 | 
						||
        out = (~run).cumsum()
 | 
						||
 | 
						||
        if ascending:
 | 
						||
            out -= np.repeat(out[run], rep)
 | 
						||
        else:
 | 
						||
            out = np.repeat(out[np.r_[run[1:], True]], rep) - out
 | 
						||
 | 
						||
        rev = np.empty(count, dtype=np.intp)
 | 
						||
        rev[sorter] = np.arange(count, dtype=np.intp)
 | 
						||
        return out[rev].astype(np.int64, copy=False)
 | 
						||
 | 
						||
    # -----------------------------------------------------------------
 | 
						||
 | 
						||
    @final
 | 
						||
    @property
 | 
						||
    def _obj_1d_constructor(self) -> Callable:
 | 
						||
        # GH28330 preserve subclassed Series/DataFrames
 | 
						||
        if isinstance(self.obj, DataFrame):
 | 
						||
            return self.obj._constructor_sliced
 | 
						||
        assert isinstance(self.obj, Series)
 | 
						||
        return self.obj._constructor
 | 
						||
 | 
						||
    @final
 | 
						||
    def _bool_agg(self, val_test: Literal["any", "all"], skipna: bool):
 | 
						||
        """
 | 
						||
        Shared func to call any / all Cython GroupBy implementations.
 | 
						||
        """
 | 
						||
 | 
						||
        def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]:
 | 
						||
            if is_object_dtype(vals.dtype):
 | 
						||
                # GH#37501: don't raise on pd.NA when skipna=True
 | 
						||
                if skipna:
 | 
						||
                    func = np.vectorize(
 | 
						||
                        lambda x: bool(x) if not isna(x) else True, otypes=[bool]
 | 
						||
                    )
 | 
						||
                    vals = func(vals)
 | 
						||
                else:
 | 
						||
                    vals = vals.astype(bool, copy=False)
 | 
						||
 | 
						||
                vals = cast(np.ndarray, vals)
 | 
						||
            elif isinstance(vals, BaseMaskedArray):
 | 
						||
                vals = vals._data.astype(bool, copy=False)
 | 
						||
            else:
 | 
						||
                vals = vals.astype(bool, copy=False)
 | 
						||
 | 
						||
            return vals.view(np.int8), bool
 | 
						||
 | 
						||
        def result_to_bool(
 | 
						||
            result: np.ndarray,
 | 
						||
            inference: type,
 | 
						||
            nullable: bool = False,
 | 
						||
        ) -> ArrayLike:
 | 
						||
            if nullable:
 | 
						||
                return BooleanArray(result.astype(bool, copy=False), result == -1)
 | 
						||
            else:
 | 
						||
                return result.astype(inference, copy=False)
 | 
						||
 | 
						||
        return self._get_cythonized_result(
 | 
						||
            libgroupby.group_any_all,
 | 
						||
            numeric_only=False,
 | 
						||
            cython_dtype=np.dtype(np.int8),
 | 
						||
            needs_mask=True,
 | 
						||
            needs_nullable=True,
 | 
						||
            pre_processing=objs_to_bool,
 | 
						||
            post_processing=result_to_bool,
 | 
						||
            val_test=val_test,
 | 
						||
            skipna=skipna,
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def any(self, skipna: bool = True):
 | 
						||
        """
 | 
						||
        Return True if any value in the group is truthful, else False.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        skipna : bool, default True
 | 
						||
            Flag to ignore nan values during truth testing.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            DataFrame or Series of boolean values, where a value is True if any element
 | 
						||
            is True within its respective group, False otherwise.
 | 
						||
        """
 | 
						||
        return self._bool_agg("any", skipna)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def all(self, skipna: bool = True):
 | 
						||
        """
 | 
						||
        Return True if all values in the group are truthful, else False.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        skipna : bool, default True
 | 
						||
            Flag to ignore nan values during truth testing.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            DataFrame or Series of boolean values, where a value is True if all elements
 | 
						||
            are True within its respective group, False otherwise.
 | 
						||
        """
 | 
						||
        return self._bool_agg("all", skipna)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def count(self) -> Series | DataFrame:
 | 
						||
        """
 | 
						||
        Compute count of group, excluding missing values.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Count of values within each group.
 | 
						||
        """
 | 
						||
        data = self._get_data_to_aggregate()
 | 
						||
        ids, _, ngroups = self.grouper.group_info
 | 
						||
        mask = ids != -1
 | 
						||
 | 
						||
        is_series = data.ndim == 1
 | 
						||
 | 
						||
        def hfunc(bvalues: ArrayLike) -> ArrayLike:
 | 
						||
            # TODO(EA2D): reshape would not be necessary with 2D EAs
 | 
						||
            if bvalues.ndim == 1:
 | 
						||
                # EA
 | 
						||
                masked = mask & ~isna(bvalues).reshape(1, -1)
 | 
						||
            else:
 | 
						||
                masked = mask & ~isna(bvalues)
 | 
						||
 | 
						||
            counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1)
 | 
						||
            if is_series:
 | 
						||
                assert counted.ndim == 2
 | 
						||
                assert counted.shape[0] == 1
 | 
						||
                return counted[0]
 | 
						||
            return counted
 | 
						||
 | 
						||
        new_mgr = data.grouped_reduce(hfunc)
 | 
						||
 | 
						||
        # If we are grouping on categoricals we want unobserved categories to
 | 
						||
        # return zero, rather than the default of NaN which the reindexing in
 | 
						||
        # _wrap_agged_manager() returns. GH 35028
 | 
						||
        with com.temp_setattr(self, "observed", True):
 | 
						||
            result = self._wrap_agged_manager(new_mgr)
 | 
						||
 | 
						||
        if result.ndim == 1:
 | 
						||
            result.index = self.grouper.result_index
 | 
						||
 | 
						||
        return self._reindex_output(result, fill_value=0)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Substitution(see_also=_common_see_also)
 | 
						||
    def mean(
 | 
						||
        self,
 | 
						||
        numeric_only: bool | lib.NoDefault = lib.no_default,
 | 
						||
        engine: str = "cython",
 | 
						||
        engine_kwargs: dict[str, bool] | None = None,
 | 
						||
    ):
 | 
						||
        """
 | 
						||
        Compute mean of groups, excluding missing values.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        numeric_only : bool, default True
 | 
						||
            Include only float, int, boolean columns. If None, will attempt to use
 | 
						||
            everything, then use only numeric data.
 | 
						||
 | 
						||
        engine : str, default None
 | 
						||
            * ``'cython'`` : Runs the operation through C-extensions from cython.
 | 
						||
            * ``'numba'`` : Runs the operation through JIT compiled code from numba.
 | 
						||
            * ``None`` : Defaults to ``'cython'`` or globally setting
 | 
						||
              ``compute.use_numba``
 | 
						||
 | 
						||
            .. versionadded:: 1.4.0
 | 
						||
 | 
						||
        engine_kwargs : dict, default None
 | 
						||
            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
 | 
						||
            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
 | 
						||
              and ``parallel`` dictionary keys. The values must either be ``True`` or
 | 
						||
              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
 | 
						||
              ``{{'nopython': True, 'nogil': False, 'parallel': False}}``
 | 
						||
 | 
						||
            .. versionadded:: 1.4.0
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        pandas.Series or pandas.DataFrame
 | 
						||
        %(see_also)s
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
        >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2],
 | 
						||
        ...                    'B': [np.nan, 2, 3, 4, 5],
 | 
						||
        ...                    'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C'])
 | 
						||
 | 
						||
        Groupby one column and return the mean of the remaining columns in
 | 
						||
        each group.
 | 
						||
 | 
						||
        >>> df.groupby('A').mean()
 | 
						||
             B         C
 | 
						||
        A
 | 
						||
        1  3.0  1.333333
 | 
						||
        2  4.0  1.500000
 | 
						||
 | 
						||
        Groupby two columns and return the mean of the remaining column.
 | 
						||
 | 
						||
        >>> df.groupby(['A', 'B']).mean()
 | 
						||
                 C
 | 
						||
        A B
 | 
						||
        1 2.0  2.0
 | 
						||
          4.0  1.0
 | 
						||
        2 3.0  1.0
 | 
						||
          5.0  2.0
 | 
						||
 | 
						||
        Groupby one column and return the mean of only particular column in
 | 
						||
        the group.
 | 
						||
 | 
						||
        >>> df.groupby('A')['B'].mean()
 | 
						||
        A
 | 
						||
        1    3.0
 | 
						||
        2    4.0
 | 
						||
        Name: B, dtype: float64
 | 
						||
        """
 | 
						||
        numeric_only_bool = self._resolve_numeric_only(numeric_only)
 | 
						||
 | 
						||
        if maybe_use_numba(engine):
 | 
						||
            from pandas.core._numba.kernels import sliding_mean
 | 
						||
 | 
						||
            return self._numba_agg_general(sliding_mean, engine_kwargs, "groupby_mean")
 | 
						||
        else:
 | 
						||
            result = self._cython_agg_general(
 | 
						||
                "mean",
 | 
						||
                alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool),
 | 
						||
                numeric_only=numeric_only_bool,
 | 
						||
            )
 | 
						||
            return result.__finalize__(self.obj, method="groupby")
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def median(self, numeric_only: bool | lib.NoDefault = lib.no_default):
 | 
						||
        """
 | 
						||
        Compute median of groups, excluding missing values.
 | 
						||
 | 
						||
        For multiple groupings, the result index will be a MultiIndex
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        numeric_only : bool, default True
 | 
						||
            Include only float, int, boolean columns. If None, will attempt to use
 | 
						||
            everything, then use only numeric data.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Median of values within each group.
 | 
						||
        """
 | 
						||
        numeric_only_bool = self._resolve_numeric_only(numeric_only)
 | 
						||
 | 
						||
        result = self._cython_agg_general(
 | 
						||
            "median",
 | 
						||
            alt=lambda x: Series(x).median(numeric_only=numeric_only_bool),
 | 
						||
            numeric_only=numeric_only_bool,
 | 
						||
        )
 | 
						||
        return result.__finalize__(self.obj, method="groupby")
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def std(
 | 
						||
        self,
 | 
						||
        ddof: int = 1,
 | 
						||
        engine: str | None = None,
 | 
						||
        engine_kwargs: dict[str, bool] | None = None,
 | 
						||
    ):
 | 
						||
        """
 | 
						||
        Compute standard deviation of groups, excluding missing values.
 | 
						||
 | 
						||
        For multiple groupings, the result index will be a MultiIndex.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        ddof : int, default 1
 | 
						||
            Degrees of freedom.
 | 
						||
 | 
						||
        engine : str, default None
 | 
						||
            * ``'cython'`` : Runs the operation through C-extensions from cython.
 | 
						||
            * ``'numba'`` : Runs the operation through JIT compiled code from numba.
 | 
						||
            * ``None`` : Defaults to ``'cython'`` or globally setting
 | 
						||
              ``compute.use_numba``
 | 
						||
 | 
						||
            .. versionadded:: 1.4.0
 | 
						||
 | 
						||
        engine_kwargs : dict, default None
 | 
						||
            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
 | 
						||
            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
 | 
						||
              and ``parallel`` dictionary keys. The values must either be ``True`` or
 | 
						||
              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
 | 
						||
              ``{{'nopython': True, 'nogil': False, 'parallel': False}}``
 | 
						||
 | 
						||
            .. versionadded:: 1.4.0
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Standard deviation of values within each group.
 | 
						||
        """
 | 
						||
        if maybe_use_numba(engine):
 | 
						||
            from pandas.core._numba.kernels import sliding_var
 | 
						||
 | 
						||
            return np.sqrt(
 | 
						||
                self._numba_agg_general(sliding_var, engine_kwargs, "groupby_std", ddof)
 | 
						||
            )
 | 
						||
        else:
 | 
						||
            return self._get_cythonized_result(
 | 
						||
                libgroupby.group_var,
 | 
						||
                needs_counts=True,
 | 
						||
                cython_dtype=np.dtype(np.float64),
 | 
						||
                post_processing=lambda vals, inference: np.sqrt(vals),
 | 
						||
                ddof=ddof,
 | 
						||
            )
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def var(
 | 
						||
        self,
 | 
						||
        ddof: int = 1,
 | 
						||
        engine: str | None = None,
 | 
						||
        engine_kwargs: dict[str, bool] | None = None,
 | 
						||
    ):
 | 
						||
        """
 | 
						||
        Compute variance of groups, excluding missing values.
 | 
						||
 | 
						||
        For multiple groupings, the result index will be a MultiIndex.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        ddof : int, default 1
 | 
						||
            Degrees of freedom.
 | 
						||
 | 
						||
        engine : str, default None
 | 
						||
            * ``'cython'`` : Runs the operation through C-extensions from cython.
 | 
						||
            * ``'numba'`` : Runs the operation through JIT compiled code from numba.
 | 
						||
            * ``None`` : Defaults to ``'cython'`` or globally setting
 | 
						||
              ``compute.use_numba``
 | 
						||
 | 
						||
            .. versionadded:: 1.4.0
 | 
						||
 | 
						||
        engine_kwargs : dict, default None
 | 
						||
            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
 | 
						||
            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
 | 
						||
              and ``parallel`` dictionary keys. The values must either be ``True`` or
 | 
						||
              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
 | 
						||
              ``{{'nopython': True, 'nogil': False, 'parallel': False}}``
 | 
						||
 | 
						||
            .. versionadded:: 1.4.0
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Variance of values within each group.
 | 
						||
        """
 | 
						||
        if maybe_use_numba(engine):
 | 
						||
            from pandas.core._numba.kernels import sliding_var
 | 
						||
 | 
						||
            return self._numba_agg_general(
 | 
						||
                sliding_var, engine_kwargs, "groupby_var", ddof
 | 
						||
            )
 | 
						||
        else:
 | 
						||
            if ddof == 1:
 | 
						||
                numeric_only = self._resolve_numeric_only(lib.no_default)
 | 
						||
                return self._cython_agg_general(
 | 
						||
                    "var",
 | 
						||
                    alt=lambda x: Series(x).var(ddof=ddof),
 | 
						||
                    numeric_only=numeric_only,
 | 
						||
                )
 | 
						||
            else:
 | 
						||
                func = lambda x: x.var(ddof=ddof)
 | 
						||
                with self._group_selection_context():
 | 
						||
                    return self._python_agg_general(func)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def sem(self, ddof: int = 1):
 | 
						||
        """
 | 
						||
        Compute standard error of the mean of groups, excluding missing values.
 | 
						||
 | 
						||
        For multiple groupings, the result index will be a MultiIndex.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        ddof : int, default 1
 | 
						||
            Degrees of freedom.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Standard error of the mean of values within each group.
 | 
						||
        """
 | 
						||
        result = self.std(ddof=ddof)
 | 
						||
        if result.ndim == 1:
 | 
						||
            result /= np.sqrt(self.count())
 | 
						||
        else:
 | 
						||
            cols = result.columns.difference(self.exclusions).unique()
 | 
						||
            counts = self.count()
 | 
						||
            result_ilocs = result.columns.get_indexer_for(cols)
 | 
						||
            count_ilocs = counts.columns.get_indexer_for(cols)
 | 
						||
            result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs])
 | 
						||
        return result
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def size(self) -> DataFrame | Series:
 | 
						||
        """
 | 
						||
        Compute group sizes.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        DataFrame or Series
 | 
						||
            Number of rows in each group as a Series if as_index is True
 | 
						||
            or a DataFrame if as_index is False.
 | 
						||
        """
 | 
						||
        result = self.grouper.size()
 | 
						||
 | 
						||
        # GH28330 preserve subclassed Series/DataFrames through calls
 | 
						||
        if isinstance(self.obj, Series):
 | 
						||
            result = self._obj_1d_constructor(result, name=self.obj.name)
 | 
						||
        else:
 | 
						||
            result = self._obj_1d_constructor(result)
 | 
						||
 | 
						||
        if not self.as_index:
 | 
						||
            # Item "None" of "Optional[Series]" has no attribute "reset_index"
 | 
						||
            result = result.rename("size").reset_index()  # type: ignore[union-attr]
 | 
						||
 | 
						||
        return self._reindex_output(result, fill_value=0)
 | 
						||
 | 
						||
    @final
 | 
						||
    @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0)
 | 
						||
    def sum(
 | 
						||
        self,
 | 
						||
        numeric_only: bool | lib.NoDefault = lib.no_default,
 | 
						||
        min_count: int = 0,
 | 
						||
        engine: str | None = None,
 | 
						||
        engine_kwargs: dict[str, bool] | None = None,
 | 
						||
    ):
 | 
						||
        if maybe_use_numba(engine):
 | 
						||
            from pandas.core._numba.kernels import sliding_sum
 | 
						||
 | 
						||
            return self._numba_agg_general(
 | 
						||
                sliding_sum,
 | 
						||
                engine_kwargs,
 | 
						||
                "groupby_sum",
 | 
						||
            )
 | 
						||
        else:
 | 
						||
            numeric_only = self._resolve_numeric_only(numeric_only)
 | 
						||
 | 
						||
            # If we are grouping on categoricals we want unobserved categories to
 | 
						||
            # return zero, rather than the default of NaN which the reindexing in
 | 
						||
            # _agg_general() returns. GH #31422
 | 
						||
            with com.temp_setattr(self, "observed", True):
 | 
						||
                result = self._agg_general(
 | 
						||
                    numeric_only=numeric_only,
 | 
						||
                    min_count=min_count,
 | 
						||
                    alias="add",
 | 
						||
                    npfunc=np.sum,
 | 
						||
                )
 | 
						||
 | 
						||
            return self._reindex_output(result, fill_value=0)
 | 
						||
 | 
						||
    @final
 | 
						||
    @doc(_groupby_agg_method_template, fname="prod", no=True, mc=0)
 | 
						||
    def prod(
 | 
						||
        self, numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0
 | 
						||
    ):
 | 
						||
        numeric_only = self._resolve_numeric_only(numeric_only)
 | 
						||
 | 
						||
        return self._agg_general(
 | 
						||
            numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    @doc(_groupby_agg_method_template, fname="min", no=False, mc=-1)
 | 
						||
    def min(self, numeric_only: bool = False, min_count: int = -1):
 | 
						||
        return self._agg_general(
 | 
						||
            numeric_only=numeric_only, min_count=min_count, alias="min", npfunc=np.min
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    @doc(_groupby_agg_method_template, fname="max", no=False, mc=-1)
 | 
						||
    def max(self, numeric_only: bool = False, min_count: int = -1):
 | 
						||
        return self._agg_general(
 | 
						||
            numeric_only=numeric_only, min_count=min_count, alias="max", npfunc=np.max
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    @doc(_groupby_agg_method_template, fname="first", no=False, mc=-1)
 | 
						||
    def first(self, numeric_only: bool = False, min_count: int = -1):
 | 
						||
        def first_compat(obj: NDFrameT, axis: int = 0):
 | 
						||
            def first(x: Series):
 | 
						||
                """Helper function for first item that isn't NA."""
 | 
						||
                arr = x.array[notna(x.array)]
 | 
						||
                if not len(arr):
 | 
						||
                    return np.nan
 | 
						||
                return arr[0]
 | 
						||
 | 
						||
            if isinstance(obj, DataFrame):
 | 
						||
                return obj.apply(first, axis=axis)
 | 
						||
            elif isinstance(obj, Series):
 | 
						||
                return first(obj)
 | 
						||
            else:  # pragma: no cover
 | 
						||
                raise TypeError(type(obj))
 | 
						||
 | 
						||
        return self._agg_general(
 | 
						||
            numeric_only=numeric_only,
 | 
						||
            min_count=min_count,
 | 
						||
            alias="first",
 | 
						||
            npfunc=first_compat,
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    @doc(_groupby_agg_method_template, fname="last", no=False, mc=-1)
 | 
						||
    def last(self, numeric_only: bool = False, min_count: int = -1):
 | 
						||
        def last_compat(obj: NDFrameT, axis: int = 0):
 | 
						||
            def last(x: Series):
 | 
						||
                """Helper function for last item that isn't NA."""
 | 
						||
                arr = x.array[notna(x.array)]
 | 
						||
                if not len(arr):
 | 
						||
                    return np.nan
 | 
						||
                return arr[-1]
 | 
						||
 | 
						||
            if isinstance(obj, DataFrame):
 | 
						||
                return obj.apply(last, axis=axis)
 | 
						||
            elif isinstance(obj, Series):
 | 
						||
                return last(obj)
 | 
						||
            else:  # pragma: no cover
 | 
						||
                raise TypeError(type(obj))
 | 
						||
 | 
						||
        return self._agg_general(
 | 
						||
            numeric_only=numeric_only,
 | 
						||
            min_count=min_count,
 | 
						||
            alias="last",
 | 
						||
            npfunc=last_compat,
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def ohlc(self) -> DataFrame:
 | 
						||
        """
 | 
						||
        Compute open, high, low and close values of a group, excluding missing values.
 | 
						||
 | 
						||
        For multiple groupings, the result index will be a MultiIndex
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        DataFrame
 | 
						||
            Open, high, low and close values within each group.
 | 
						||
        """
 | 
						||
        if self.obj.ndim == 1:
 | 
						||
            # self._iterate_slices() yields only self._selected_obj
 | 
						||
            obj = self._selected_obj
 | 
						||
 | 
						||
            is_numeric = is_numeric_dtype(obj.dtype)
 | 
						||
            if not is_numeric:
 | 
						||
                raise DataError("No numeric types to aggregate")
 | 
						||
 | 
						||
            res_values = self.grouper._cython_operation(
 | 
						||
                "aggregate", obj._values, "ohlc", axis=0, min_count=-1
 | 
						||
            )
 | 
						||
 | 
						||
            agg_names = ["open", "high", "low", "close"]
 | 
						||
            result = self.obj._constructor_expanddim(
 | 
						||
                res_values, index=self.grouper.result_index, columns=agg_names
 | 
						||
            )
 | 
						||
            return self._reindex_output(result)
 | 
						||
 | 
						||
        return self._apply_to_column_groupbys(
 | 
						||
            lambda x: x.ohlc(), self._obj_with_exclusions
 | 
						||
        )
 | 
						||
 | 
						||
    @doc(DataFrame.describe)
 | 
						||
    def describe(self, **kwargs):
 | 
						||
        with self._group_selection_context():
 | 
						||
            result = self.apply(lambda x: x.describe(**kwargs))
 | 
						||
            if self.axis == 1:
 | 
						||
                return result.T
 | 
						||
            return result.unstack()
 | 
						||
 | 
						||
    @final
 | 
						||
    def resample(self, rule, *args, **kwargs):
 | 
						||
        """
 | 
						||
        Provide resampling when using a TimeGrouper.
 | 
						||
 | 
						||
        Given a grouper, the function resamples it according to a string
 | 
						||
        "string" -> "frequency".
 | 
						||
 | 
						||
        See the :ref:`frequency aliases <timeseries.offset_aliases>`
 | 
						||
        documentation for more details.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        rule : str or DateOffset
 | 
						||
            The offset string or object representing target grouper conversion.
 | 
						||
        *args, **kwargs
 | 
						||
            Possible arguments are `how`, `fill_method`, `limit`, `kind` and
 | 
						||
            `on`, and other arguments of `TimeGrouper`.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Grouper
 | 
						||
            Return a new grouper with our resampler appended.
 | 
						||
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        Grouper : Specify a frequency to resample with when
 | 
						||
            grouping by a key.
 | 
						||
        DatetimeIndex.resample : Frequency conversion and resampling of
 | 
						||
            time series.
 | 
						||
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
        >>> idx = pd.date_range('1/1/2000', periods=4, freq='T')
 | 
						||
        >>> df = pd.DataFrame(data=4 * [range(2)],
 | 
						||
        ...                   index=idx,
 | 
						||
        ...                   columns=['a', 'b'])
 | 
						||
        >>> df.iloc[2, 0] = 5
 | 
						||
        >>> df
 | 
						||
                            a  b
 | 
						||
        2000-01-01 00:00:00  0  1
 | 
						||
        2000-01-01 00:01:00  0  1
 | 
						||
        2000-01-01 00:02:00  5  1
 | 
						||
        2000-01-01 00:03:00  0  1
 | 
						||
 | 
						||
        Downsample the DataFrame into 3 minute bins and sum the values of
 | 
						||
        the timestamps falling into a bin.
 | 
						||
 | 
						||
        >>> df.groupby('a').resample('3T').sum()
 | 
						||
                                 a  b
 | 
						||
        a
 | 
						||
        0   2000-01-01 00:00:00  0  2
 | 
						||
            2000-01-01 00:03:00  0  1
 | 
						||
        5   2000-01-01 00:00:00  5  1
 | 
						||
 | 
						||
        Upsample the series into 30 second bins.
 | 
						||
 | 
						||
        >>> df.groupby('a').resample('30S').sum()
 | 
						||
                            a  b
 | 
						||
        a
 | 
						||
        0   2000-01-01 00:00:00  0  1
 | 
						||
            2000-01-01 00:00:30  0  0
 | 
						||
            2000-01-01 00:01:00  0  1
 | 
						||
            2000-01-01 00:01:30  0  0
 | 
						||
            2000-01-01 00:02:00  0  0
 | 
						||
            2000-01-01 00:02:30  0  0
 | 
						||
            2000-01-01 00:03:00  0  1
 | 
						||
        5   2000-01-01 00:02:00  5  1
 | 
						||
 | 
						||
        Resample by month. Values are assigned to the month of the period.
 | 
						||
 | 
						||
        >>> df.groupby('a').resample('M').sum()
 | 
						||
                    a  b
 | 
						||
        a
 | 
						||
        0   2000-01-31  0  3
 | 
						||
        5   2000-01-31  5  1
 | 
						||
 | 
						||
        Downsample the series into 3 minute bins as above, but close the right
 | 
						||
        side of the bin interval.
 | 
						||
 | 
						||
        >>> df.groupby('a').resample('3T', closed='right').sum()
 | 
						||
                                 a  b
 | 
						||
        a
 | 
						||
        0   1999-12-31 23:57:00  0  1
 | 
						||
            2000-01-01 00:00:00  0  2
 | 
						||
        5   2000-01-01 00:00:00  5  1
 | 
						||
 | 
						||
        Downsample the series into 3 minute bins and close the right side of
 | 
						||
        the bin interval, but label each bin using the right edge instead of
 | 
						||
        the left.
 | 
						||
 | 
						||
        >>> df.groupby('a').resample('3T', closed='right', label='right').sum()
 | 
						||
                                 a  b
 | 
						||
        a
 | 
						||
        0   2000-01-01 00:00:00  0  1
 | 
						||
            2000-01-01 00:03:00  0  2
 | 
						||
        5   2000-01-01 00:03:00  5  1
 | 
						||
        """
 | 
						||
        from pandas.core.resample import get_resampler_for_grouping
 | 
						||
 | 
						||
        return get_resampler_for_grouping(self, rule, *args, **kwargs)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def rolling(self, *args, **kwargs):
 | 
						||
        """
 | 
						||
        Return a rolling grouper, providing rolling functionality per group.
 | 
						||
        """
 | 
						||
        from pandas.core.window import RollingGroupby
 | 
						||
 | 
						||
        return RollingGroupby(
 | 
						||
            self._selected_obj,
 | 
						||
            *args,
 | 
						||
            _grouper=self.grouper,
 | 
						||
            _as_index=self.as_index,
 | 
						||
            **kwargs,
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def expanding(self, *args, **kwargs):
 | 
						||
        """
 | 
						||
        Return an expanding grouper, providing expanding
 | 
						||
        functionality per group.
 | 
						||
        """
 | 
						||
        from pandas.core.window import ExpandingGroupby
 | 
						||
 | 
						||
        return ExpandingGroupby(
 | 
						||
            self._selected_obj,
 | 
						||
            *args,
 | 
						||
            _grouper=self.grouper,
 | 
						||
            **kwargs,
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def ewm(self, *args, **kwargs):
 | 
						||
        """
 | 
						||
        Return an ewm grouper, providing ewm functionality per group.
 | 
						||
        """
 | 
						||
        from pandas.core.window import ExponentialMovingWindowGroupby
 | 
						||
 | 
						||
        return ExponentialMovingWindowGroupby(
 | 
						||
            self._selected_obj,
 | 
						||
            *args,
 | 
						||
            _grouper=self.grouper,
 | 
						||
            **kwargs,
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    def _fill(self, direction: Literal["ffill", "bfill"], limit=None):
 | 
						||
        """
 | 
						||
        Shared function for `pad` and `backfill` to call Cython method.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        direction : {'ffill', 'bfill'}
 | 
						||
            Direction passed to underlying Cython function. `bfill` will cause
 | 
						||
            values to be filled backwards. `ffill` and any other values will
 | 
						||
            default to a forward fill
 | 
						||
        limit : int, default None
 | 
						||
            Maximum number of consecutive values to fill. If `None`, this
 | 
						||
            method will convert to -1 prior to passing to Cython
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        `Series` or `DataFrame` with filled values
 | 
						||
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        pad : Returns Series with minimum number of char in object.
 | 
						||
        backfill : Backward fill the missing values in the dataset.
 | 
						||
        """
 | 
						||
        # Need int value for Cython
 | 
						||
        if limit is None:
 | 
						||
            limit = -1
 | 
						||
 | 
						||
        ids, _, _ = self.grouper.group_info
 | 
						||
        sorted_labels = np.argsort(ids, kind="mergesort").astype(np.intp, copy=False)
 | 
						||
        if direction == "bfill":
 | 
						||
            sorted_labels = sorted_labels[::-1]
 | 
						||
 | 
						||
        col_func = partial(
 | 
						||
            libgroupby.group_fillna_indexer,
 | 
						||
            labels=ids,
 | 
						||
            sorted_labels=sorted_labels,
 | 
						||
            direction=direction,
 | 
						||
            limit=limit,
 | 
						||
            dropna=self.dropna,
 | 
						||
        )
 | 
						||
 | 
						||
        def blk_func(values: ArrayLike) -> ArrayLike:
 | 
						||
            mask = isna(values)
 | 
						||
            if values.ndim == 1:
 | 
						||
                indexer = np.empty(values.shape, dtype=np.intp)
 | 
						||
                col_func(out=indexer, mask=mask)
 | 
						||
                return algorithms.take_nd(values, indexer)
 | 
						||
 | 
						||
            else:
 | 
						||
                # We broadcast algorithms.take_nd analogous to
 | 
						||
                #  np.take_along_axis
 | 
						||
 | 
						||
                # Note: we only get here with backfill/pad,
 | 
						||
                #  so if we have a dtype that cannot hold NAs,
 | 
						||
                #  then there will be no -1s in indexer, so we can use
 | 
						||
                #  the original dtype (no need to ensure_dtype_can_hold_na)
 | 
						||
                if isinstance(values, np.ndarray):
 | 
						||
                    out = np.empty(values.shape, dtype=values.dtype)
 | 
						||
                else:
 | 
						||
                    out = type(values)._empty(values.shape, dtype=values.dtype)
 | 
						||
 | 
						||
                for i in range(len(values)):
 | 
						||
                    # call group_fillna_indexer column-wise
 | 
						||
                    indexer = np.empty(values.shape[1], dtype=np.intp)
 | 
						||
                    col_func(out=indexer, mask=mask[i])
 | 
						||
                    out[i, :] = algorithms.take_nd(values[i], indexer)
 | 
						||
                return out
 | 
						||
 | 
						||
        obj = self._obj_with_exclusions
 | 
						||
        if self.axis == 1:
 | 
						||
            obj = obj.T
 | 
						||
        mgr = obj._mgr
 | 
						||
        res_mgr = mgr.apply(blk_func)
 | 
						||
 | 
						||
        new_obj = obj._constructor(res_mgr)
 | 
						||
        if isinstance(new_obj, Series):
 | 
						||
            new_obj.name = obj.name
 | 
						||
 | 
						||
        return self._wrap_transformed_output(new_obj)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    def ffill(self, limit=None):
 | 
						||
        """
 | 
						||
        Forward fill the values.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        limit : int, optional
 | 
						||
            Limit of how many values to fill.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Object with missing values filled.
 | 
						||
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        Series.ffill: Returns Series with minimum number of char in object.
 | 
						||
        DataFrame.ffill: Object with missing values filled or None if inplace=True.
 | 
						||
        Series.fillna: Fill NaN values of a Series.
 | 
						||
        DataFrame.fillna: Fill NaN values of a DataFrame.
 | 
						||
        """
 | 
						||
        return self._fill("ffill", limit=limit)
 | 
						||
 | 
						||
    def pad(self, limit=None):
 | 
						||
        warnings.warn(
 | 
						||
            "pad is deprecated and will be removed in a future version. "
 | 
						||
            "Use ffill instead.",
 | 
						||
            FutureWarning,
 | 
						||
            stacklevel=find_stack_level(),
 | 
						||
        )
 | 
						||
        return self.ffill(limit=limit)
 | 
						||
 | 
						||
    pad.__doc__ = ffill.__doc__
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    def bfill(self, limit=None):
 | 
						||
        """
 | 
						||
        Backward fill the values.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        limit : int, optional
 | 
						||
            Limit of how many values to fill.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Object with missing values filled.
 | 
						||
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        Series.bfill :  Backward fill the missing values in the dataset.
 | 
						||
        DataFrame.bfill:  Backward fill the missing values in the dataset.
 | 
						||
        Series.fillna: Fill NaN values of a Series.
 | 
						||
        DataFrame.fillna: Fill NaN values of a DataFrame.
 | 
						||
        """
 | 
						||
        return self._fill("bfill", limit=limit)
 | 
						||
 | 
						||
    def backfill(self, limit=None):
 | 
						||
        warnings.warn(
 | 
						||
            "backfill is deprecated and will be removed in a future version. "
 | 
						||
            "Use bfill instead.",
 | 
						||
            FutureWarning,
 | 
						||
            stacklevel=find_stack_level(),
 | 
						||
        )
 | 
						||
        return self.bfill(limit=limit)
 | 
						||
 | 
						||
    backfill.__doc__ = bfill.__doc__
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Substitution(see_also=_common_see_also)
 | 
						||
    def nth(
 | 
						||
        self,
 | 
						||
        n: PositionalIndexer | tuple,
 | 
						||
        dropna: Literal["any", "all", None] = None,
 | 
						||
    ) -> NDFrameT:
 | 
						||
        """
 | 
						||
        Take the nth row from each group if n is an int, otherwise a subset of rows.
 | 
						||
 | 
						||
        Can be either a call or an index. dropna is not available with index notation.
 | 
						||
        Index notation accepts a comma separated list of integers and slices.
 | 
						||
 | 
						||
        If dropna, will take the nth non-null row, dropna is either
 | 
						||
        'all' or 'any'; this is equivalent to calling dropna(how=dropna)
 | 
						||
        before the groupby.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        n : int, slice or list of ints and slices
 | 
						||
            A single nth value for the row or a list of nth values or slices.
 | 
						||
 | 
						||
            .. versionchanged:: 1.4.0
 | 
						||
                Added slice and lists containing slices.
 | 
						||
                Added index notation.
 | 
						||
 | 
						||
        dropna : {'any', 'all', None}, default None
 | 
						||
            Apply the specified dropna operation before counting which row is
 | 
						||
            the nth row. Only supported if n is an int.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            N-th value within each group.
 | 
						||
        %(see_also)s
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
 | 
						||
        >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2],
 | 
						||
        ...                    'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B'])
 | 
						||
        >>> g = df.groupby('A')
 | 
						||
        >>> g.nth(0)
 | 
						||
             B
 | 
						||
        A
 | 
						||
        1  NaN
 | 
						||
        2  3.0
 | 
						||
        >>> g.nth(1)
 | 
						||
             B
 | 
						||
        A
 | 
						||
        1  2.0
 | 
						||
        2  5.0
 | 
						||
        >>> g.nth(-1)
 | 
						||
             B
 | 
						||
        A
 | 
						||
        1  4.0
 | 
						||
        2  5.0
 | 
						||
        >>> g.nth([0, 1])
 | 
						||
             B
 | 
						||
        A
 | 
						||
        1  NaN
 | 
						||
        1  2.0
 | 
						||
        2  3.0
 | 
						||
        2  5.0
 | 
						||
        >>> g.nth(slice(None, -1))
 | 
						||
             B
 | 
						||
        A
 | 
						||
        1  NaN
 | 
						||
        1  2.0
 | 
						||
        2  3.0
 | 
						||
 | 
						||
        Index notation may also be used
 | 
						||
 | 
						||
        >>> g.nth[0, 1]
 | 
						||
             B
 | 
						||
        A
 | 
						||
        1  NaN
 | 
						||
        1  2.0
 | 
						||
        2  3.0
 | 
						||
        2  5.0
 | 
						||
        >>> g.nth[:-1]
 | 
						||
             B
 | 
						||
        A
 | 
						||
        1  NaN
 | 
						||
        1  2.0
 | 
						||
        2  3.0
 | 
						||
 | 
						||
        Specifying `dropna` allows count ignoring ``NaN``
 | 
						||
 | 
						||
        >>> g.nth(0, dropna='any')
 | 
						||
             B
 | 
						||
        A
 | 
						||
        1  2.0
 | 
						||
        2  3.0
 | 
						||
 | 
						||
        NaNs denote group exhausted when using dropna
 | 
						||
 | 
						||
        >>> g.nth(3, dropna='any')
 | 
						||
            B
 | 
						||
        A
 | 
						||
        1 NaN
 | 
						||
        2 NaN
 | 
						||
 | 
						||
        Specifying `as_index=False` in `groupby` keeps the original index.
 | 
						||
 | 
						||
        >>> df.groupby('A', as_index=False).nth(1)
 | 
						||
           A    B
 | 
						||
        1  1  2.0
 | 
						||
        4  2  5.0
 | 
						||
        """
 | 
						||
        if not dropna:
 | 
						||
            with self._group_selection_context():
 | 
						||
                mask = self._make_mask_from_positional_indexer(n)
 | 
						||
 | 
						||
                ids, _, _ = self.grouper.group_info
 | 
						||
 | 
						||
                # Drop NA values in grouping
 | 
						||
                mask = mask & (ids != -1)
 | 
						||
 | 
						||
                out = self._mask_selected_obj(mask)
 | 
						||
                if not self.as_index:
 | 
						||
                    return out
 | 
						||
 | 
						||
                result_index = self.grouper.result_index
 | 
						||
                if self.axis == 0:
 | 
						||
                    out.index = result_index[ids[mask]]
 | 
						||
                    if not self.observed and isinstance(result_index, CategoricalIndex):
 | 
						||
                        out = out.reindex(result_index)
 | 
						||
 | 
						||
                    out = self._reindex_output(out)
 | 
						||
                else:
 | 
						||
                    out.columns = result_index[ids[mask]]
 | 
						||
 | 
						||
                return out.sort_index(axis=self.axis) if self.sort else out
 | 
						||
 | 
						||
        # dropna is truthy
 | 
						||
        if not is_integer(n):
 | 
						||
            raise ValueError("dropna option only supported for an integer argument")
 | 
						||
 | 
						||
        if dropna not in ["any", "all"]:
 | 
						||
            # Note: when agg-ing picker doesn't raise this, just returns NaN
 | 
						||
            raise ValueError(
 | 
						||
                "For a DataFrame or Series groupby.nth, dropna must be "
 | 
						||
                "either None, 'any' or 'all', "
 | 
						||
                f"(was passed {dropna})."
 | 
						||
            )
 | 
						||
 | 
						||
        # old behaviour, but with all and any support for DataFrames.
 | 
						||
        # modified in GH 7559 to have better perf
 | 
						||
        n = cast(int, n)
 | 
						||
        max_len = n if n >= 0 else -1 - n
 | 
						||
        dropped = self.obj.dropna(how=dropna, axis=self.axis)
 | 
						||
 | 
						||
        # get a new grouper for our dropped obj
 | 
						||
        if self.keys is None and self.level is None:
 | 
						||
 | 
						||
            # we don't have the grouper info available
 | 
						||
            # (e.g. we have selected out
 | 
						||
            # a column that is not in the current object)
 | 
						||
            axis = self.grouper.axis
 | 
						||
            grouper = axis[axis.isin(dropped.index)]
 | 
						||
 | 
						||
        else:
 | 
						||
 | 
						||
            # create a grouper with the original parameters, but on dropped
 | 
						||
            # object
 | 
						||
            from pandas.core.groupby.grouper import get_grouper
 | 
						||
 | 
						||
            grouper, _, _ = get_grouper(
 | 
						||
                dropped,
 | 
						||
                key=self.keys,
 | 
						||
                axis=self.axis,
 | 
						||
                level=self.level,
 | 
						||
                sort=self.sort,
 | 
						||
                mutated=self.mutated,
 | 
						||
            )
 | 
						||
 | 
						||
        grb = dropped.groupby(
 | 
						||
            grouper, as_index=self.as_index, sort=self.sort, axis=self.axis
 | 
						||
        )
 | 
						||
        sizes, result = grb.size(), grb.nth(n)
 | 
						||
        mask = (sizes < max_len)._values
 | 
						||
 | 
						||
        # set the results which don't meet the criteria
 | 
						||
        if len(result) and mask.any():
 | 
						||
            result.loc[mask] = np.nan
 | 
						||
 | 
						||
        # reset/reindex to the original groups
 | 
						||
        if len(self.obj) == len(dropped) or len(result) == len(
 | 
						||
            self.grouper.result_index
 | 
						||
        ):
 | 
						||
            result.index = self.grouper.result_index
 | 
						||
        else:
 | 
						||
            result = result.reindex(self.grouper.result_index)
 | 
						||
 | 
						||
        return result
 | 
						||
 | 
						||
    @final
 | 
						||
    def quantile(self, q=0.5, interpolation: str = "linear"):
 | 
						||
        """
 | 
						||
        Return group values at the given quantile, a la numpy.percentile.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        q : float or array-like, default 0.5 (50% quantile)
 | 
						||
            Value(s) between 0 and 1 providing the quantile(s) to compute.
 | 
						||
        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
 | 
						||
            Method to use when the desired quantile falls between two points.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Return type determined by caller of GroupBy object.
 | 
						||
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        Series.quantile : Similar method for Series.
 | 
						||
        DataFrame.quantile : Similar method for DataFrame.
 | 
						||
        numpy.percentile : NumPy method to compute qth percentile.
 | 
						||
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
        >>> df = pd.DataFrame([
 | 
						||
        ...     ['a', 1], ['a', 2], ['a', 3],
 | 
						||
        ...     ['b', 1], ['b', 3], ['b', 5]
 | 
						||
        ... ], columns=['key', 'val'])
 | 
						||
        >>> df.groupby('key').quantile()
 | 
						||
            val
 | 
						||
        key
 | 
						||
        a    2.0
 | 
						||
        b    3.0
 | 
						||
        """
 | 
						||
 | 
						||
        def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]:
 | 
						||
            if is_object_dtype(vals):
 | 
						||
                raise TypeError(
 | 
						||
                    "'quantile' cannot be performed against 'object' dtypes!"
 | 
						||
                )
 | 
						||
 | 
						||
            inference: np.dtype | None = None
 | 
						||
            if is_integer_dtype(vals.dtype):
 | 
						||
                if isinstance(vals, ExtensionArray):
 | 
						||
                    out = vals.to_numpy(dtype=float, na_value=np.nan)
 | 
						||
                else:
 | 
						||
                    out = vals
 | 
						||
                inference = np.dtype(np.int64)
 | 
						||
            elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray):
 | 
						||
                out = vals.to_numpy(dtype=float, na_value=np.nan)
 | 
						||
            elif is_datetime64_dtype(vals.dtype):
 | 
						||
                inference = np.dtype("datetime64[ns]")
 | 
						||
                out = np.asarray(vals).astype(float)
 | 
						||
            elif is_timedelta64_dtype(vals.dtype):
 | 
						||
                inference = np.dtype("timedelta64[ns]")
 | 
						||
                out = np.asarray(vals).astype(float)
 | 
						||
            elif isinstance(vals, ExtensionArray) and is_float_dtype(vals):
 | 
						||
                inference = np.dtype(np.float64)
 | 
						||
                out = vals.to_numpy(dtype=float, na_value=np.nan)
 | 
						||
            else:
 | 
						||
                out = np.asarray(vals)
 | 
						||
 | 
						||
            return out, inference
 | 
						||
 | 
						||
        def post_processor(vals: np.ndarray, inference: np.dtype | None) -> np.ndarray:
 | 
						||
            if inference:
 | 
						||
                # Check for edge case
 | 
						||
                if not (
 | 
						||
                    is_integer_dtype(inference)
 | 
						||
                    and interpolation in {"linear", "midpoint"}
 | 
						||
                ):
 | 
						||
                    vals = vals.astype(inference)
 | 
						||
 | 
						||
            return vals
 | 
						||
 | 
						||
        orig_scalar = is_scalar(q)
 | 
						||
        if orig_scalar:
 | 
						||
            q = [q]
 | 
						||
 | 
						||
        qs = np.array(q, dtype=np.float64)
 | 
						||
        ids, _, ngroups = self.grouper.group_info
 | 
						||
        nqs = len(qs)
 | 
						||
 | 
						||
        func = partial(
 | 
						||
            libgroupby.group_quantile, labels=ids, qs=qs, interpolation=interpolation
 | 
						||
        )
 | 
						||
 | 
						||
        # Put '-1' (NaN) labels as the last group so it does not interfere
 | 
						||
        # with the calculations. Note: length check avoids failure on empty
 | 
						||
        # labels. In that case, the value doesn't matter
 | 
						||
        na_label_for_sorting = ids.max() + 1 if len(ids) > 0 else 0
 | 
						||
        labels_for_lexsort = np.where(ids == -1, na_label_for_sorting, ids)
 | 
						||
 | 
						||
        def blk_func(values: ArrayLike) -> ArrayLike:
 | 
						||
            mask = isna(values)
 | 
						||
            vals, inference = pre_processor(values)
 | 
						||
 | 
						||
            ncols = 1
 | 
						||
            if vals.ndim == 2:
 | 
						||
                ncols = vals.shape[0]
 | 
						||
                shaped_labels = np.broadcast_to(
 | 
						||
                    labels_for_lexsort, (ncols, len(labels_for_lexsort))
 | 
						||
                )
 | 
						||
            else:
 | 
						||
                shaped_labels = labels_for_lexsort
 | 
						||
 | 
						||
            out = np.empty((ncols, ngroups, nqs), dtype=np.float64)
 | 
						||
 | 
						||
            # Get an index of values sorted by values and then labels
 | 
						||
            order = (vals, shaped_labels)
 | 
						||
            sort_arr = np.lexsort(order).astype(np.intp, copy=False)
 | 
						||
 | 
						||
            if vals.ndim == 1:
 | 
						||
                func(out[0], values=vals, mask=mask, sort_indexer=sort_arr)
 | 
						||
            else:
 | 
						||
                for i in range(ncols):
 | 
						||
                    func(out[i], values=vals[i], mask=mask[i], sort_indexer=sort_arr[i])
 | 
						||
 | 
						||
            if vals.ndim == 1:
 | 
						||
                out = out.ravel("K")
 | 
						||
            else:
 | 
						||
                out = out.reshape(ncols, ngroups * nqs)
 | 
						||
            return post_processor(out, inference)
 | 
						||
 | 
						||
        obj = self._obj_with_exclusions
 | 
						||
        is_ser = obj.ndim == 1
 | 
						||
        mgr = self._get_data_to_aggregate()
 | 
						||
 | 
						||
        res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
 | 
						||
        if not is_ser and len(res_mgr.items) != len(mgr.items):
 | 
						||
            warn_dropping_nuisance_columns_deprecated(type(self), "quantile")
 | 
						||
 | 
						||
            if len(res_mgr.items) == 0:
 | 
						||
                # re-call grouped_reduce to get the desired exception message
 | 
						||
                mgr.grouped_reduce(blk_func, ignore_failures=False)
 | 
						||
                # grouped_reduce _should_ raise, so this should not be reached
 | 
						||
                raise TypeError(  # pragma: no cover
 | 
						||
                    "All columns were dropped in grouped_reduce"
 | 
						||
                )
 | 
						||
 | 
						||
        if is_ser:
 | 
						||
            res = self._wrap_agged_manager(res_mgr)
 | 
						||
        else:
 | 
						||
            res = obj._constructor(res_mgr)
 | 
						||
 | 
						||
        if orig_scalar:
 | 
						||
            # Avoid expensive MultiIndex construction
 | 
						||
            return self._wrap_aggregated_output(res)
 | 
						||
        return self._wrap_aggregated_output(res, qs=qs)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    def ngroup(self, ascending: bool = True):
 | 
						||
        """
 | 
						||
        Number each group from 0 to the number of groups - 1.
 | 
						||
 | 
						||
        This is the enumerative complement of cumcount.  Note that the
 | 
						||
        numbers given to the groups match the order in which the groups
 | 
						||
        would be seen when iterating over the groupby object, not the
 | 
						||
        order they are first observed.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        ascending : bool, default True
 | 
						||
            If False, number in reverse, from number of group - 1 to 0.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series
 | 
						||
            Unique numbers for each group.
 | 
						||
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        .cumcount : Number the rows in each group.
 | 
						||
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
        >>> df = pd.DataFrame({"A": list("aaabba")})
 | 
						||
        >>> df
 | 
						||
           A
 | 
						||
        0  a
 | 
						||
        1  a
 | 
						||
        2  a
 | 
						||
        3  b
 | 
						||
        4  b
 | 
						||
        5  a
 | 
						||
        >>> df.groupby('A').ngroup()
 | 
						||
        0    0
 | 
						||
        1    0
 | 
						||
        2    0
 | 
						||
        3    1
 | 
						||
        4    1
 | 
						||
        5    0
 | 
						||
        dtype: int64
 | 
						||
        >>> df.groupby('A').ngroup(ascending=False)
 | 
						||
        0    1
 | 
						||
        1    1
 | 
						||
        2    1
 | 
						||
        3    0
 | 
						||
        4    0
 | 
						||
        5    1
 | 
						||
        dtype: int64
 | 
						||
        >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup()
 | 
						||
        0    0
 | 
						||
        1    0
 | 
						||
        2    1
 | 
						||
        3    3
 | 
						||
        4    2
 | 
						||
        5    0
 | 
						||
        dtype: int64
 | 
						||
        """
 | 
						||
        with self._group_selection_context():
 | 
						||
            index = self._selected_obj.index
 | 
						||
            result = self._obj_1d_constructor(
 | 
						||
                self.grouper.group_info[0], index, dtype=np.int64
 | 
						||
            )
 | 
						||
            if not ascending:
 | 
						||
                result = self.ngroups - 1 - result
 | 
						||
            return result
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    def cumcount(self, ascending: bool = True):
 | 
						||
        """
 | 
						||
        Number each item in each group from 0 to the length of that group - 1.
 | 
						||
 | 
						||
        Essentially this is equivalent to
 | 
						||
 | 
						||
        .. code-block:: python
 | 
						||
 | 
						||
            self.apply(lambda x: pd.Series(np.arange(len(x)), x.index))
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        ascending : bool, default True
 | 
						||
            If False, number in reverse, from length of group - 1 to 0.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series
 | 
						||
            Sequence number of each element within each group.
 | 
						||
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        .ngroup : Number the groups themselves.
 | 
						||
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
        >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']],
 | 
						||
        ...                   columns=['A'])
 | 
						||
        >>> df
 | 
						||
           A
 | 
						||
        0  a
 | 
						||
        1  a
 | 
						||
        2  a
 | 
						||
        3  b
 | 
						||
        4  b
 | 
						||
        5  a
 | 
						||
        >>> df.groupby('A').cumcount()
 | 
						||
        0    0
 | 
						||
        1    1
 | 
						||
        2    2
 | 
						||
        3    0
 | 
						||
        4    1
 | 
						||
        5    3
 | 
						||
        dtype: int64
 | 
						||
        >>> df.groupby('A').cumcount(ascending=False)
 | 
						||
        0    3
 | 
						||
        1    2
 | 
						||
        2    1
 | 
						||
        3    1
 | 
						||
        4    0
 | 
						||
        5    0
 | 
						||
        dtype: int64
 | 
						||
        """
 | 
						||
        with self._group_selection_context():
 | 
						||
            index = self._selected_obj._get_axis(self.axis)
 | 
						||
            cumcounts = self._cumcount_array(ascending=ascending)
 | 
						||
            return self._obj_1d_constructor(cumcounts, index)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Substitution(see_also=_common_see_also)
 | 
						||
    def rank(
 | 
						||
        self,
 | 
						||
        method: str = "average",
 | 
						||
        ascending: bool = True,
 | 
						||
        na_option: str = "keep",
 | 
						||
        pct: bool = False,
 | 
						||
        axis: int = 0,
 | 
						||
    ):
 | 
						||
        """
 | 
						||
        Provide the rank of values within each group.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
 | 
						||
            * average: average rank of group.
 | 
						||
            * min: lowest rank in group.
 | 
						||
            * max: highest rank in group.
 | 
						||
            * first: ranks assigned in order they appear in the array.
 | 
						||
            * dense: like 'min', but rank always increases by 1 between groups.
 | 
						||
        ascending : bool, default True
 | 
						||
            False for ranks by high (1) to low (N).
 | 
						||
        na_option : {'keep', 'top', 'bottom'}, default 'keep'
 | 
						||
            * keep: leave NA values where they are.
 | 
						||
            * top: smallest rank if ascending.
 | 
						||
            * bottom: smallest rank if descending.
 | 
						||
        pct : bool, default False
 | 
						||
            Compute percentage rank of data within each group.
 | 
						||
        axis : int, default 0
 | 
						||
            The axis of the object over which to compute the rank.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        DataFrame with ranking of values within each group
 | 
						||
        %(see_also)s
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
        >>> df = pd.DataFrame(
 | 
						||
        ...     {
 | 
						||
        ...         "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
 | 
						||
        ...         "value": [2, 4, 2, 3, 5, 1, 2, 4, 1, 5],
 | 
						||
        ...     }
 | 
						||
        ... )
 | 
						||
        >>> df
 | 
						||
          group  value
 | 
						||
        0     a      2
 | 
						||
        1     a      4
 | 
						||
        2     a      2
 | 
						||
        3     a      3
 | 
						||
        4     a      5
 | 
						||
        5     b      1
 | 
						||
        6     b      2
 | 
						||
        7     b      4
 | 
						||
        8     b      1
 | 
						||
        9     b      5
 | 
						||
        >>> for method in ['average', 'min', 'max', 'dense', 'first']:
 | 
						||
        ...     df[f'{method}_rank'] = df.groupby('group')['value'].rank(method)
 | 
						||
        >>> df
 | 
						||
          group  value  average_rank  min_rank  max_rank  dense_rank  first_rank
 | 
						||
        0     a      2           1.5       1.0       2.0         1.0         1.0
 | 
						||
        1     a      4           4.0       4.0       4.0         3.0         4.0
 | 
						||
        2     a      2           1.5       1.0       2.0         1.0         2.0
 | 
						||
        3     a      3           3.0       3.0       3.0         2.0         3.0
 | 
						||
        4     a      5           5.0       5.0       5.0         4.0         5.0
 | 
						||
        5     b      1           1.5       1.0       2.0         1.0         1.0
 | 
						||
        6     b      2           3.0       3.0       3.0         2.0         3.0
 | 
						||
        7     b      4           4.0       4.0       4.0         3.0         4.0
 | 
						||
        8     b      1           1.5       1.0       2.0         1.0         2.0
 | 
						||
        9     b      5           5.0       5.0       5.0         4.0         5.0
 | 
						||
        """
 | 
						||
        if na_option not in {"keep", "top", "bottom"}:
 | 
						||
            msg = "na_option must be one of 'keep', 'top', or 'bottom'"
 | 
						||
            raise ValueError(msg)
 | 
						||
 | 
						||
        kwargs = {
 | 
						||
            "ties_method": method,
 | 
						||
            "ascending": ascending,
 | 
						||
            "na_option": na_option,
 | 
						||
            "pct": pct,
 | 
						||
        }
 | 
						||
        if axis != 0:
 | 
						||
            # DataFrame uses different keyword name
 | 
						||
            kwargs["method"] = kwargs.pop("ties_method")
 | 
						||
            return self.apply(lambda x: x.rank(axis=axis, numeric_only=False, **kwargs))
 | 
						||
 | 
						||
        return self._cython_transform(
 | 
						||
            "rank",
 | 
						||
            numeric_only=False,
 | 
						||
            axis=axis,
 | 
						||
            **kwargs,
 | 
						||
        )
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def cumprod(self, axis=0, *args, **kwargs):
 | 
						||
        """
 | 
						||
        Cumulative product for each group.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
        """
 | 
						||
        nv.validate_groupby_func("cumprod", args, kwargs, ["numeric_only", "skipna"])
 | 
						||
        if axis != 0:
 | 
						||
            return self.apply(lambda x: x.cumprod(axis=axis, **kwargs))
 | 
						||
 | 
						||
        return self._cython_transform("cumprod", **kwargs)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def cumsum(self, axis=0, *args, **kwargs):
 | 
						||
        """
 | 
						||
        Cumulative sum for each group.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
        """
 | 
						||
        nv.validate_groupby_func("cumsum", args, kwargs, ["numeric_only", "skipna"])
 | 
						||
        if axis != 0:
 | 
						||
            return self.apply(lambda x: x.cumsum(axis=axis, **kwargs))
 | 
						||
 | 
						||
        return self._cython_transform("cumsum", **kwargs)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def cummin(self, axis=0, **kwargs):
 | 
						||
        """
 | 
						||
        Cumulative min for each group.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
        """
 | 
						||
        skipna = kwargs.get("skipna", True)
 | 
						||
        if axis != 0:
 | 
						||
            return self.apply(lambda x: np.minimum.accumulate(x, axis))
 | 
						||
 | 
						||
        return self._cython_transform("cummin", numeric_only=False, skipna=skipna)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def cummax(self, axis=0, **kwargs):
 | 
						||
        """
 | 
						||
        Cumulative max for each group.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
        """
 | 
						||
        skipna = kwargs.get("skipna", True)
 | 
						||
        if axis != 0:
 | 
						||
            return self.apply(lambda x: np.maximum.accumulate(x, axis))
 | 
						||
 | 
						||
        return self._cython_transform("cummax", numeric_only=False, skipna=skipna)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _get_cythonized_result(
 | 
						||
        self,
 | 
						||
        base_func: Callable,
 | 
						||
        cython_dtype: np.dtype,
 | 
						||
        numeric_only: bool | lib.NoDefault = lib.no_default,
 | 
						||
        needs_counts: bool = False,
 | 
						||
        needs_nullable: bool = False,
 | 
						||
        needs_mask: bool = False,
 | 
						||
        pre_processing=None,
 | 
						||
        post_processing=None,
 | 
						||
        **kwargs,
 | 
						||
    ):
 | 
						||
        """
 | 
						||
        Get result for Cythonized functions.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        base_func : callable, Cythonized function to be called
 | 
						||
        cython_dtype : np.dtype
 | 
						||
            Type of the array that will be modified by the Cython call.
 | 
						||
        numeric_only : bool, default True
 | 
						||
            Whether only numeric datatypes should be computed
 | 
						||
        needs_counts : bool, default False
 | 
						||
            Whether the counts should be a part of the Cython call
 | 
						||
        needs_mask : bool, default False
 | 
						||
            Whether boolean mask needs to be part of the Cython call
 | 
						||
            signature
 | 
						||
        needs_nullable : bool, default False
 | 
						||
            Whether a bool specifying if the input is nullable is part
 | 
						||
            of the Cython call signature
 | 
						||
        pre_processing : function, default None
 | 
						||
            Function to be applied to `values` prior to passing to Cython.
 | 
						||
            Function should return a tuple where the first element is the
 | 
						||
            values to be passed to Cython and the second element is an optional
 | 
						||
            type which the values should be converted to after being returned
 | 
						||
            by the Cython operation. This function is also responsible for
 | 
						||
            raising a TypeError if the values have an invalid type. Raises
 | 
						||
            if `needs_values` is False.
 | 
						||
        post_processing : function, default None
 | 
						||
            Function to be applied to result of Cython function. Should accept
 | 
						||
            an array of values as the first argument and type inferences as its
 | 
						||
            second argument, i.e. the signature should be
 | 
						||
            (ndarray, Type). If `needs_nullable=True`, a third argument should be
 | 
						||
            `nullable`, to allow for processing specific to nullable values.
 | 
						||
        **kwargs : dict
 | 
						||
            Extra arguments to be passed back to Cython funcs
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        `Series` or `DataFrame`  with filled values
 | 
						||
        """
 | 
						||
        numeric_only = self._resolve_numeric_only(numeric_only)
 | 
						||
 | 
						||
        if post_processing and not callable(post_processing):
 | 
						||
            raise ValueError("'post_processing' must be a callable!")
 | 
						||
        if pre_processing and not callable(pre_processing):
 | 
						||
            raise ValueError("'pre_processing' must be a callable!")
 | 
						||
 | 
						||
        grouper = self.grouper
 | 
						||
 | 
						||
        ids, _, ngroups = grouper.group_info
 | 
						||
 | 
						||
        how = base_func.__name__
 | 
						||
        base_func = partial(base_func, labels=ids)
 | 
						||
 | 
						||
        def blk_func(values: ArrayLike) -> ArrayLike:
 | 
						||
            values = values.T
 | 
						||
            ncols = 1 if values.ndim == 1 else values.shape[1]
 | 
						||
 | 
						||
            result: ArrayLike
 | 
						||
            result = np.zeros(ngroups * ncols, dtype=cython_dtype)
 | 
						||
            result = result.reshape((ngroups, ncols))
 | 
						||
 | 
						||
            func = partial(base_func, out=result)
 | 
						||
 | 
						||
            inferences = None
 | 
						||
 | 
						||
            if needs_counts:
 | 
						||
                counts = np.zeros(self.ngroups, dtype=np.int64)
 | 
						||
                func = partial(func, counts=counts)
 | 
						||
 | 
						||
            vals = values
 | 
						||
            if pre_processing:
 | 
						||
                vals, inferences = pre_processing(vals)
 | 
						||
 | 
						||
            vals = vals.astype(cython_dtype, copy=False)
 | 
						||
            if vals.ndim == 1:
 | 
						||
                vals = vals.reshape((-1, 1))
 | 
						||
            func = partial(func, values=vals)
 | 
						||
 | 
						||
            if needs_mask:
 | 
						||
                mask = isna(values).view(np.uint8)
 | 
						||
                if mask.ndim == 1:
 | 
						||
                    mask = mask.reshape(-1, 1)
 | 
						||
                func = partial(func, mask=mask)
 | 
						||
 | 
						||
            if needs_nullable:
 | 
						||
                is_nullable = isinstance(values, BaseMaskedArray)
 | 
						||
                func = partial(func, nullable=is_nullable)
 | 
						||
 | 
						||
            func(**kwargs)  # Call func to modify indexer values in place
 | 
						||
 | 
						||
            if values.ndim == 1:
 | 
						||
                assert result.shape[1] == 1, result.shape
 | 
						||
                result = result[:, 0]
 | 
						||
 | 
						||
            if post_processing:
 | 
						||
                pp_kwargs = {}
 | 
						||
                if needs_nullable:
 | 
						||
                    pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
 | 
						||
 | 
						||
                result = post_processing(result, inferences, **pp_kwargs)
 | 
						||
 | 
						||
            return result.T
 | 
						||
 | 
						||
        obj = self._obj_with_exclusions
 | 
						||
 | 
						||
        # Operate block-wise instead of column-by-column
 | 
						||
        is_ser = obj.ndim == 1
 | 
						||
        mgr = self._get_data_to_aggregate()
 | 
						||
 | 
						||
        if numeric_only:
 | 
						||
            mgr = mgr.get_numeric_data()
 | 
						||
 | 
						||
        res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
 | 
						||
 | 
						||
        if not is_ser and len(res_mgr.items) != len(mgr.items):
 | 
						||
            howstr = how.replace("group_", "")
 | 
						||
            warn_dropping_nuisance_columns_deprecated(type(self), howstr)
 | 
						||
 | 
						||
            if len(res_mgr.items) == 0:
 | 
						||
                # We re-call grouped_reduce to get the right exception message
 | 
						||
                mgr.grouped_reduce(blk_func, ignore_failures=False)
 | 
						||
                # grouped_reduce _should_ raise, so this should not be reached
 | 
						||
                raise TypeError(  # pragma: no cover
 | 
						||
                    "All columns were dropped in grouped_reduce"
 | 
						||
                )
 | 
						||
 | 
						||
        if is_ser:
 | 
						||
            out = self._wrap_agged_manager(res_mgr)
 | 
						||
        else:
 | 
						||
            out = obj._constructor(res_mgr)
 | 
						||
 | 
						||
        return self._wrap_aggregated_output(out)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    def shift(self, periods=1, freq=None, axis=0, fill_value=None):
 | 
						||
        """
 | 
						||
        Shift each group by periods observations.
 | 
						||
 | 
						||
        If freq is passed, the index will be increased using the periods and the freq.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        periods : int, default 1
 | 
						||
            Number of periods to shift.
 | 
						||
        freq : str, optional
 | 
						||
            Frequency string.
 | 
						||
        axis : axis to shift, default 0
 | 
						||
            Shift direction.
 | 
						||
        fill_value : optional
 | 
						||
            The scalar value to use for newly introduced missing values.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Object shifted within each group.
 | 
						||
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        Index.shift : Shift values of Index.
 | 
						||
        tshift : Shift the time index, using the index’s frequency
 | 
						||
            if available.
 | 
						||
        """
 | 
						||
        if freq is not None or axis != 0:
 | 
						||
            return self.apply(lambda x: x.shift(periods, freq, axis, fill_value))
 | 
						||
 | 
						||
        ids, _, ngroups = self.grouper.group_info
 | 
						||
        res_indexer = np.zeros(len(ids), dtype=np.int64)
 | 
						||
 | 
						||
        libgroupby.group_shift_indexer(res_indexer, ids, ngroups, periods)
 | 
						||
 | 
						||
        obj = self._obj_with_exclusions
 | 
						||
 | 
						||
        res = obj._reindex_with_indexers(
 | 
						||
            {self.axis: (obj.axes[self.axis], res_indexer)},
 | 
						||
            fill_value=fill_value,
 | 
						||
            allow_dups=True,
 | 
						||
        )
 | 
						||
        return res
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Appender(_common_see_also)
 | 
						||
    def pct_change(self, periods=1, fill_method="ffill", limit=None, freq=None, axis=0):
 | 
						||
        """
 | 
						||
        Calculate pct_change of each value to previous entry in group.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Percentage changes within each group.
 | 
						||
        """
 | 
						||
        # TODO(GH#23918): Remove this conditional for SeriesGroupBy when
 | 
						||
        #  GH#23918 is fixed
 | 
						||
        if freq is not None or axis != 0:
 | 
						||
            return self.apply(
 | 
						||
                lambda x: x.pct_change(
 | 
						||
                    periods=periods,
 | 
						||
                    fill_method=fill_method,
 | 
						||
                    limit=limit,
 | 
						||
                    freq=freq,
 | 
						||
                    axis=axis,
 | 
						||
                )
 | 
						||
            )
 | 
						||
        if fill_method is None:  # GH30463
 | 
						||
            fill_method = "ffill"
 | 
						||
            limit = 0
 | 
						||
        filled = getattr(self, fill_method)(limit=limit)
 | 
						||
        fill_grp = filled.groupby(self.grouper.codes, axis=self.axis)
 | 
						||
        shifted = fill_grp.shift(periods=periods, freq=freq, axis=self.axis)
 | 
						||
        return (filled / shifted) - 1
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Substitution(see_also=_common_see_also)
 | 
						||
    def head(self, n=5):
 | 
						||
        """
 | 
						||
        Return first n rows of each group.
 | 
						||
 | 
						||
        Similar to ``.apply(lambda x: x.head(n))``, but it returns a subset of rows
 | 
						||
        from the original DataFrame with original index and order preserved
 | 
						||
        (``as_index`` flag is ignored).
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        n : int
 | 
						||
            If positive: number of entries to include from start of each group.
 | 
						||
            If negative: number of entries to exclude from end of each group.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Subset of original Series or DataFrame as determined by n.
 | 
						||
        %(see_also)s
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
 | 
						||
        >>> df = pd.DataFrame([[1, 2], [1, 4], [5, 6]],
 | 
						||
        ...                   columns=['A', 'B'])
 | 
						||
        >>> df.groupby('A').head(1)
 | 
						||
           A  B
 | 
						||
        0  1  2
 | 
						||
        2  5  6
 | 
						||
        >>> df.groupby('A').head(-1)
 | 
						||
           A  B
 | 
						||
        0  1  2
 | 
						||
        """
 | 
						||
        self._reset_group_selection()
 | 
						||
        mask = self._make_mask_from_positional_indexer(slice(None, n))
 | 
						||
        return self._mask_selected_obj(mask)
 | 
						||
 | 
						||
    @final
 | 
						||
    @Substitution(name="groupby")
 | 
						||
    @Substitution(see_also=_common_see_also)
 | 
						||
    def tail(self, n=5):
 | 
						||
        """
 | 
						||
        Return last n rows of each group.
 | 
						||
 | 
						||
        Similar to ``.apply(lambda x: x.tail(n))``, but it returns a subset of rows
 | 
						||
        from the original DataFrame with original index and order preserved
 | 
						||
        (``as_index`` flag is ignored).
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        n : int
 | 
						||
            If positive: number of entries to include from end of each group.
 | 
						||
            If negative: number of entries to exclude from start of each group.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Subset of original Series or DataFrame as determined by n.
 | 
						||
        %(see_also)s
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
 | 
						||
        >>> df = pd.DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]],
 | 
						||
        ...                   columns=['A', 'B'])
 | 
						||
        >>> df.groupby('A').tail(1)
 | 
						||
           A  B
 | 
						||
        1  a  2
 | 
						||
        3  b  2
 | 
						||
        >>> df.groupby('A').tail(-1)
 | 
						||
           A  B
 | 
						||
        1  a  2
 | 
						||
        3  b  2
 | 
						||
        """
 | 
						||
        self._reset_group_selection()
 | 
						||
        if n:
 | 
						||
            mask = self._make_mask_from_positional_indexer(slice(-n, None))
 | 
						||
        else:
 | 
						||
            mask = self._make_mask_from_positional_indexer([])
 | 
						||
 | 
						||
        return self._mask_selected_obj(mask)
 | 
						||
 | 
						||
    @final
 | 
						||
    def _mask_selected_obj(self, mask: np.ndarray) -> NDFrameT:
 | 
						||
        """
 | 
						||
        Return _selected_obj with mask applied to the correct axis.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        mask : np.ndarray
 | 
						||
            Boolean mask to apply.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Filtered _selected_obj.
 | 
						||
        """
 | 
						||
        ids = self.grouper.group_info[0]
 | 
						||
        mask = mask & (ids != -1)
 | 
						||
 | 
						||
        if self.axis == 0:
 | 
						||
            return self._selected_obj[mask]
 | 
						||
        else:
 | 
						||
            return self._selected_obj.iloc[:, mask]
 | 
						||
 | 
						||
    @final
 | 
						||
    def _reindex_output(
 | 
						||
        self,
 | 
						||
        output: OutputFrameOrSeries,
 | 
						||
        fill_value: Scalar = np.NaN,
 | 
						||
        qs: npt.NDArray[np.float64] | None = None,
 | 
						||
    ) -> OutputFrameOrSeries:
 | 
						||
        """
 | 
						||
        If we have categorical groupers, then we might want to make sure that
 | 
						||
        we have a fully re-indexed output to the levels. This means expanding
 | 
						||
        the output space to accommodate all values in the cartesian product of
 | 
						||
        our groups, regardless of whether they were observed in the data or
 | 
						||
        not. This will expand the output space if there are missing groups.
 | 
						||
 | 
						||
        The method returns early without modifying the input if the number of
 | 
						||
        groupings is less than 2, self.observed == True or none of the groupers
 | 
						||
        are categorical.
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        output : Series or DataFrame
 | 
						||
            Object resulting from grouping and applying an operation.
 | 
						||
        fill_value : scalar, default np.NaN
 | 
						||
            Value to use for unobserved categories if self.observed is False.
 | 
						||
        qs : np.ndarray[float64] or None, default None
 | 
						||
            quantile values, only relevant for quantile.
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            Object (potentially) re-indexed to include all possible groups.
 | 
						||
        """
 | 
						||
        groupings = self.grouper.groupings
 | 
						||
        if len(groupings) == 1:
 | 
						||
            return output
 | 
						||
 | 
						||
        # if we only care about the observed values
 | 
						||
        # we are done
 | 
						||
        elif self.observed:
 | 
						||
            return output
 | 
						||
 | 
						||
        # reindexing only applies to a Categorical grouper
 | 
						||
        elif not any(
 | 
						||
            isinstance(ping.grouping_vector, (Categorical, CategoricalIndex))
 | 
						||
            for ping in groupings
 | 
						||
        ):
 | 
						||
            return output
 | 
						||
 | 
						||
        levels_list = [ping.group_index for ping in groupings]
 | 
						||
        names = self.grouper.names
 | 
						||
        if qs is not None:
 | 
						||
            # error: Argument 1 to "append" of "list" has incompatible type
 | 
						||
            # "ndarray[Any, dtype[floating[_64Bit]]]"; expected "Index"
 | 
						||
            levels_list.append(qs)  # type: ignore[arg-type]
 | 
						||
            names = names + [None]
 | 
						||
        index, _ = MultiIndex.from_product(levels_list, names=names).sortlevel()
 | 
						||
 | 
						||
        if self.as_index:
 | 
						||
            d = {
 | 
						||
                self.obj._get_axis_name(self.axis): index,
 | 
						||
                "copy": False,
 | 
						||
                "fill_value": fill_value,
 | 
						||
            }
 | 
						||
            return output.reindex(**d)
 | 
						||
 | 
						||
        # GH 13204
 | 
						||
        # Here, the categorical in-axis groupers, which need to be fully
 | 
						||
        # expanded, are columns in `output`. An idea is to do:
 | 
						||
        # output = output.set_index(self.grouper.names)
 | 
						||
        #                .reindex(index).reset_index()
 | 
						||
        # but special care has to be taken because of possible not-in-axis
 | 
						||
        # groupers.
 | 
						||
        # So, we manually select and drop the in-axis grouper columns,
 | 
						||
        # reindex `output`, and then reset the in-axis grouper columns.
 | 
						||
 | 
						||
        # Select in-axis groupers
 | 
						||
        in_axis_grps = (
 | 
						||
            (i, ping.name) for (i, ping) in enumerate(groupings) if ping.in_axis
 | 
						||
        )
 | 
						||
        g_nums, g_names = zip(*in_axis_grps)
 | 
						||
 | 
						||
        output = output.drop(labels=list(g_names), axis=1)
 | 
						||
 | 
						||
        # Set a temp index and reindex (possibly expanding)
 | 
						||
        output = output.set_index(self.grouper.result_index).reindex(
 | 
						||
            index, copy=False, fill_value=fill_value
 | 
						||
        )
 | 
						||
 | 
						||
        # Reset in-axis grouper columns
 | 
						||
        # (using level numbers `g_nums` because level names may not be unique)
 | 
						||
        output = output.reset_index(level=g_nums)
 | 
						||
 | 
						||
        return output.reset_index(drop=True)
 | 
						||
 | 
						||
    @final
 | 
						||
    def sample(
 | 
						||
        self,
 | 
						||
        n: int | None = None,
 | 
						||
        frac: float | None = None,
 | 
						||
        replace: bool = False,
 | 
						||
        weights: Sequence | Series | None = None,
 | 
						||
        random_state: RandomState | None = None,
 | 
						||
    ):
 | 
						||
        """
 | 
						||
        Return a random sample of items from each group.
 | 
						||
 | 
						||
        You can use `random_state` for reproducibility.
 | 
						||
 | 
						||
        .. versionadded:: 1.1.0
 | 
						||
 | 
						||
        Parameters
 | 
						||
        ----------
 | 
						||
        n : int, optional
 | 
						||
            Number of items to return for each group. Cannot be used with
 | 
						||
            `frac` and must be no larger than the smallest group unless
 | 
						||
            `replace` is True. Default is one if `frac` is None.
 | 
						||
        frac : float, optional
 | 
						||
            Fraction of items to return. Cannot be used with `n`.
 | 
						||
        replace : bool, default False
 | 
						||
            Allow or disallow sampling of the same row more than once.
 | 
						||
        weights : list-like, optional
 | 
						||
            Default None results in equal probability weighting.
 | 
						||
            If passed a list-like then values must have the same length as
 | 
						||
            the underlying DataFrame or Series object and will be used as
 | 
						||
            sampling probabilities after normalization within each group.
 | 
						||
            Values must be non-negative with at least one positive element
 | 
						||
            within each group.
 | 
						||
        random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional
 | 
						||
            If int, array-like, or BitGenerator, seed for random number generator.
 | 
						||
            If np.random.RandomState or np.random.Generator, use as given.
 | 
						||
 | 
						||
            .. versionchanged:: 1.4.0
 | 
						||
 | 
						||
                np.random.Generator objects now accepted
 | 
						||
 | 
						||
        Returns
 | 
						||
        -------
 | 
						||
        Series or DataFrame
 | 
						||
            A new object of same type as caller containing items randomly
 | 
						||
            sampled within each group from the caller object.
 | 
						||
 | 
						||
        See Also
 | 
						||
        --------
 | 
						||
        DataFrame.sample: Generate random samples from a DataFrame object.
 | 
						||
        numpy.random.choice: Generate a random sample from a given 1-D numpy
 | 
						||
            array.
 | 
						||
 | 
						||
        Examples
 | 
						||
        --------
 | 
						||
        >>> df = pd.DataFrame(
 | 
						||
        ...     {"a": ["red"] * 2 + ["blue"] * 2 + ["black"] * 2, "b": range(6)}
 | 
						||
        ... )
 | 
						||
        >>> df
 | 
						||
               a  b
 | 
						||
        0    red  0
 | 
						||
        1    red  1
 | 
						||
        2   blue  2
 | 
						||
        3   blue  3
 | 
						||
        4  black  4
 | 
						||
        5  black  5
 | 
						||
 | 
						||
        Select one row at random for each distinct value in column a. The
 | 
						||
        `random_state` argument can be used to guarantee reproducibility:
 | 
						||
 | 
						||
        >>> df.groupby("a").sample(n=1, random_state=1)
 | 
						||
               a  b
 | 
						||
        4  black  4
 | 
						||
        2   blue  2
 | 
						||
        1    red  1
 | 
						||
 | 
						||
        Set `frac` to sample fixed proportions rather than counts:
 | 
						||
 | 
						||
        >>> df.groupby("a")["b"].sample(frac=0.5, random_state=2)
 | 
						||
        5    5
 | 
						||
        2    2
 | 
						||
        0    0
 | 
						||
        Name: b, dtype: int64
 | 
						||
 | 
						||
        Control sample probabilities within groups by setting weights:
 | 
						||
 | 
						||
        >>> df.groupby("a").sample(
 | 
						||
        ...     n=1,
 | 
						||
        ...     weights=[1, 1, 1, 0, 0, 1],
 | 
						||
        ...     random_state=1,
 | 
						||
        ... )
 | 
						||
               a  b
 | 
						||
        5  black  5
 | 
						||
        2   blue  2
 | 
						||
        0    red  0
 | 
						||
        """  # noqa:E501
 | 
						||
        size = sample.process_sampling_size(n, frac, replace)
 | 
						||
        if weights is not None:
 | 
						||
            weights_arr = sample.preprocess_weights(
 | 
						||
                self._selected_obj, weights, axis=self.axis
 | 
						||
            )
 | 
						||
 | 
						||
        random_state = com.random_state(random_state)
 | 
						||
 | 
						||
        group_iterator = self.grouper.get_iterator(self._selected_obj, self.axis)
 | 
						||
 | 
						||
        sampled_indices = []
 | 
						||
        for labels, obj in group_iterator:
 | 
						||
            grp_indices = self.indices[labels]
 | 
						||
            group_size = len(grp_indices)
 | 
						||
            if size is not None:
 | 
						||
                sample_size = size
 | 
						||
            else:
 | 
						||
                assert frac is not None
 | 
						||
                sample_size = round(frac * group_size)
 | 
						||
 | 
						||
            grp_sample = sample.sample(
 | 
						||
                group_size,
 | 
						||
                size=sample_size,
 | 
						||
                replace=replace,
 | 
						||
                weights=None if weights is None else weights_arr[grp_indices],
 | 
						||
                random_state=random_state,
 | 
						||
            )
 | 
						||
            sampled_indices.append(grp_indices[grp_sample])
 | 
						||
 | 
						||
        sampled_indices = np.concatenate(sampled_indices)
 | 
						||
        return self._selected_obj.take(sampled_indices, axis=self.axis)
 | 
						||
 | 
						||
 | 
						||
@doc(GroupBy)
 | 
						||
def get_groupby(
 | 
						||
    obj: NDFrame,
 | 
						||
    by: _KeysArgType | None = None,
 | 
						||
    axis: int = 0,
 | 
						||
    level=None,
 | 
						||
    grouper: ops.BaseGrouper | None = None,
 | 
						||
    exclusions=None,
 | 
						||
    selection=None,
 | 
						||
    as_index: bool = True,
 | 
						||
    sort: bool = True,
 | 
						||
    group_keys: bool = True,
 | 
						||
    squeeze: bool = False,
 | 
						||
    observed: bool = False,
 | 
						||
    mutated: bool = False,
 | 
						||
    dropna: bool = True,
 | 
						||
) -> GroupBy:
 | 
						||
 | 
						||
    klass: type[GroupBy]
 | 
						||
    if isinstance(obj, Series):
 | 
						||
        from pandas.core.groupby.generic import SeriesGroupBy
 | 
						||
 | 
						||
        klass = SeriesGroupBy
 | 
						||
    elif isinstance(obj, DataFrame):
 | 
						||
        from pandas.core.groupby.generic import DataFrameGroupBy
 | 
						||
 | 
						||
        klass = DataFrameGroupBy
 | 
						||
    else:  # pragma: no cover
 | 
						||
        raise TypeError(f"invalid type: {obj}")
 | 
						||
 | 
						||
    return klass(
 | 
						||
        obj=obj,
 | 
						||
        keys=by,
 | 
						||
        axis=axis,
 | 
						||
        level=level,
 | 
						||
        grouper=grouper,
 | 
						||
        exclusions=exclusions,
 | 
						||
        selection=selection,
 | 
						||
        as_index=as_index,
 | 
						||
        sort=sort,
 | 
						||
        group_keys=group_keys,
 | 
						||
        squeeze=squeeze,
 | 
						||
        observed=observed,
 | 
						||
        mutated=mutated,
 | 
						||
        dropna=dropna,
 | 
						||
    )
 | 
						||
 | 
						||
 | 
						||
def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiIndex:
 | 
						||
    """
 | 
						||
    Insert the sequence 'qs' of quantiles as the inner-most level of a MultiIndex.
 | 
						||
 | 
						||
    The quantile level in the MultiIndex is a repeated copy of 'qs'.
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    idx : Index
 | 
						||
    qs : np.ndarray[float64]
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    MultiIndex
 | 
						||
    """
 | 
						||
    nqs = len(qs)
 | 
						||
 | 
						||
    if idx._is_multi:
 | 
						||
        idx = cast(MultiIndex, idx)
 | 
						||
        lev_codes, lev = Index(qs).factorize()
 | 
						||
        levels = list(idx.levels) + [lev]
 | 
						||
        codes = [np.repeat(x, nqs) for x in idx.codes] + [np.tile(lev_codes, len(idx))]
 | 
						||
        mi = MultiIndex(levels=levels, codes=codes, names=idx.names + [None])
 | 
						||
    else:
 | 
						||
        mi = MultiIndex.from_product([idx, qs])
 | 
						||
    return mi
 | 
						||
 | 
						||
 | 
						||
def warn_dropping_nuisance_columns_deprecated(cls, how: str) -> None:
 | 
						||
    warnings.warn(
 | 
						||
        "Dropping invalid columns in "
 | 
						||
        f"{cls.__name__}.{how} is deprecated. "
 | 
						||
        "In a future version, a TypeError will be raised. "
 | 
						||
        f"Before calling .{how}, select only columns which "
 | 
						||
        "should be valid for the function.",
 | 
						||
        FutureWarning,
 | 
						||
        stacklevel=find_stack_level(),
 | 
						||
    )
 |