1407 lines
		
	
	
		
			45 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1407 lines
		
	
	
		
			45 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""
 | 
						|
test .agg behavior / note that .apply is tested generally in test_groupby.py
 | 
						|
"""
 | 
						|
import datetime
 | 
						|
import functools
 | 
						|
from functools import partial
 | 
						|
import re
 | 
						|
 | 
						|
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
from pandas.core.dtypes.common import is_integer_dtype
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
from pandas import (
 | 
						|
    DataFrame,
 | 
						|
    Index,
 | 
						|
    MultiIndex,
 | 
						|
    Series,
 | 
						|
    concat,
 | 
						|
    to_datetime,
 | 
						|
)
 | 
						|
import pandas._testing as tm
 | 
						|
from pandas.core.base import SpecificationError
 | 
						|
from pandas.core.groupby.grouper import Grouping
 | 
						|
 | 
						|
 | 
						|
def test_groupby_agg_no_extra_calls():
 | 
						|
    # GH#31760
 | 
						|
    df = DataFrame({"key": ["a", "b", "c", "c"], "value": [1, 2, 3, 4]})
 | 
						|
    gb = df.groupby("key")["value"]
 | 
						|
 | 
						|
    def dummy_func(x):
 | 
						|
        assert len(x) != 0
 | 
						|
        return x.sum()
 | 
						|
 | 
						|
    gb.agg(dummy_func)
 | 
						|
 | 
						|
 | 
						|
def test_agg_regression1(tsframe):
 | 
						|
    grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
 | 
						|
    result = grouped.agg(np.mean)
 | 
						|
    expected = grouped.mean()
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_agg_must_agg(df):
 | 
						|
    grouped = df.groupby("A")["C"]
 | 
						|
 | 
						|
    msg = "Must produce aggregated value"
 | 
						|
    with pytest.raises(Exception, match=msg):
 | 
						|
        grouped.agg(lambda x: x.describe())
 | 
						|
    with pytest.raises(Exception, match=msg):
 | 
						|
        grouped.agg(lambda x: x.index[:2])
 | 
						|
 | 
						|
 | 
						|
def test_agg_ser_multi_key(df):
 | 
						|
 | 
						|
    f = lambda x: x.sum()
 | 
						|
    results = df.C.groupby([df.A, df.B]).aggregate(f)
 | 
						|
    expected = df.groupby(["A", "B"]).sum()["C"]
 | 
						|
    tm.assert_series_equal(results, expected)
 | 
						|
 | 
						|
 | 
						|
def test_groupby_aggregation_mixed_dtype():
 | 
						|
    # GH 6212
 | 
						|
    expected = DataFrame(
 | 
						|
        {
 | 
						|
            "v1": [5, 5, 7, np.nan, 3, 3, 4, 1],
 | 
						|
            "v2": [55, 55, 77, np.nan, 33, 33, 44, 11],
 | 
						|
        },
 | 
						|
        index=MultiIndex.from_tuples(
 | 
						|
            [
 | 
						|
                (1, 95),
 | 
						|
                (1, 99),
 | 
						|
                (2, 95),
 | 
						|
                (2, 99),
 | 
						|
                ("big", "damp"),
 | 
						|
                ("blue", "dry"),
 | 
						|
                ("red", "red"),
 | 
						|
                ("red", "wet"),
 | 
						|
            ],
 | 
						|
            names=["by1", "by2"],
 | 
						|
        ),
 | 
						|
    )
 | 
						|
 | 
						|
    df = DataFrame(
 | 
						|
        {
 | 
						|
            "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
 | 
						|
            "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
 | 
						|
            "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12],
 | 
						|
            "by2": [
 | 
						|
                "wet",
 | 
						|
                "dry",
 | 
						|
                99,
 | 
						|
                95,
 | 
						|
                np.nan,
 | 
						|
                "damp",
 | 
						|
                95,
 | 
						|
                99,
 | 
						|
                "red",
 | 
						|
                99,
 | 
						|
                np.nan,
 | 
						|
                np.nan,
 | 
						|
            ],
 | 
						|
        }
 | 
						|
    )
 | 
						|
 | 
						|
    g = df.groupby(["by1", "by2"])
 | 
						|
    result = g[["v1", "v2"]].mean()
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_groupby_aggregation_multi_level_column():
 | 
						|
    # GH 29772
 | 
						|
    lst = [
 | 
						|
        [True, True, True, False],
 | 
						|
        [True, False, np.nan, False],
 | 
						|
        [True, True, np.nan, False],
 | 
						|
        [True, True, np.nan, False],
 | 
						|
    ]
 | 
						|
    df = DataFrame(
 | 
						|
        data=lst,
 | 
						|
        columns=MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]),
 | 
						|
    )
 | 
						|
 | 
						|
    gb = df.groupby(level=1, axis=1)
 | 
						|
    result = gb.sum(numeric_only=False)
 | 
						|
    expected = DataFrame({0: [2.0, True, True, True], 1: [1, 0, 1, 1]})
 | 
						|
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_agg_apply_corner(ts, tsframe):
 | 
						|
    # nothing to group, all NA
 | 
						|
    grouped = ts.groupby(ts * np.nan)
 | 
						|
    assert ts.dtype == np.float64
 | 
						|
 | 
						|
    # groupby float64 values results in Float64Index
 | 
						|
    exp = Series([], dtype=np.float64, index=Index([], dtype=np.float64))
 | 
						|
    tm.assert_series_equal(grouped.sum(), exp)
 | 
						|
    tm.assert_series_equal(grouped.agg(np.sum), exp)
 | 
						|
    tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False)
 | 
						|
 | 
						|
    # DataFrame
 | 
						|
    grouped = tsframe.groupby(tsframe["A"] * np.nan)
 | 
						|
    exp_df = DataFrame(
 | 
						|
        columns=tsframe.columns,
 | 
						|
        dtype=float,
 | 
						|
        index=Index([], name="A", dtype=np.float64),
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(grouped.sum(), exp_df)
 | 
						|
    tm.assert_frame_equal(grouped.agg(np.sum), exp_df)
 | 
						|
    tm.assert_frame_equal(grouped.apply(np.sum), exp_df)
 | 
						|
 | 
						|
 | 
						|
def test_agg_grouping_is_list_tuple(ts):
 | 
						|
    df = tm.makeTimeDataFrame()
 | 
						|
 | 
						|
    grouped = df.groupby(lambda x: x.year)
 | 
						|
    grouper = grouped.grouper.groupings[0].grouping_vector
 | 
						|
    grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper))
 | 
						|
 | 
						|
    result = grouped.agg(np.mean)
 | 
						|
    expected = grouped.mean()
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper))
 | 
						|
 | 
						|
    result = grouped.agg(np.mean)
 | 
						|
    expected = grouped.mean()
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_agg_python_multiindex(mframe):
 | 
						|
    grouped = mframe.groupby(["A", "B"])
 | 
						|
 | 
						|
    result = grouped.agg(np.mean)
 | 
						|
    expected = grouped.mean()
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "groupbyfunc", [lambda x: x.weekday(), [lambda x: x.month, lambda x: x.weekday()]]
 | 
						|
)
 | 
						|
def test_aggregate_str_func(tsframe, groupbyfunc):
 | 
						|
    grouped = tsframe.groupby(groupbyfunc)
 | 
						|
 | 
						|
    # single series
 | 
						|
    result = grouped["A"].agg("std")
 | 
						|
    expected = grouped["A"].std()
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
    # group frame by function name
 | 
						|
    result = grouped.aggregate("var")
 | 
						|
    expected = grouped.var()
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    # group frame by function dict
 | 
						|
    result = grouped.agg({"A": "var", "B": "std", "C": "mean", "D": "sem"})
 | 
						|
    expected = DataFrame(
 | 
						|
        {
 | 
						|
            "A": grouped["A"].var(),
 | 
						|
            "B": grouped["B"].std(),
 | 
						|
            "C": grouped["C"].mean(),
 | 
						|
            "D": grouped["D"].sem(),
 | 
						|
        }
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_agg_str_with_kwarg_axis_1_raises(df, reduction_func):
 | 
						|
    gb = df.groupby(level=0)
 | 
						|
    if reduction_func in ("idxmax", "idxmin"):
 | 
						|
        error = TypeError
 | 
						|
        msg = "reduction operation '.*' not allowed for this dtype"
 | 
						|
    else:
 | 
						|
        error = ValueError
 | 
						|
        msg = f"Operation {reduction_func} does not support axis=1"
 | 
						|
    with pytest.raises(error, match=msg):
 | 
						|
        gb.agg(reduction_func, axis=1)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "func, expected, dtype, result_dtype_dict",
 | 
						|
    [
 | 
						|
        ("sum", [5, 7, 9], "int64", {}),
 | 
						|
        ("std", [4.5**0.5] * 3, int, {"i": float, "j": float, "k": float}),
 | 
						|
        ("var", [4.5] * 3, int, {"i": float, "j": float, "k": float}),
 | 
						|
        ("sum", [5, 7, 9], "Int64", {"j": "int64"}),
 | 
						|
        ("std", [4.5**0.5] * 3, "Int64", {"i": float, "j": float, "k": float}),
 | 
						|
        ("var", [4.5] * 3, "Int64", {"i": "float64", "j": "float64", "k": "float64"}),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype_dict):
 | 
						|
    # GH#43209
 | 
						|
    df = DataFrame(
 | 
						|
        [[1, 2, 3, 4, 5, 6]] * 3,
 | 
						|
        columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]),
 | 
						|
    ).astype({("a", "j"): dtype, ("b", "j"): dtype})
 | 
						|
    result = df.groupby(level=1, axis=1).agg(func)
 | 
						|
    expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype(
 | 
						|
        result_dtype_dict
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "func, expected_data, result_dtype_dict",
 | 
						|
    [
 | 
						|
        ("sum", [[2, 4], [10, 12], [18, 20]], {10: "int64", 20: "int64"}),
 | 
						|
        # std should ideally return Int64 / Float64 #43330
 | 
						|
        ("std", [[2**0.5] * 2] * 3, "float64"),
 | 
						|
        ("var", [[2] * 2] * 3, {10: "float64", 20: "float64"}),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict):
 | 
						|
    # GH#43209
 | 
						|
    df = DataFrame(
 | 
						|
        np.arange(12).reshape(3, 4),
 | 
						|
        index=Index([0, 1, 0], name="y"),
 | 
						|
        columns=Index([10, 20, 10, 20], name="x"),
 | 
						|
        dtype="int64",
 | 
						|
    ).astype({10: "Int64"})
 | 
						|
    result = df.groupby("x", axis=1).agg(func)
 | 
						|
    expected = DataFrame(
 | 
						|
        data=expected_data,
 | 
						|
        index=Index([0, 1, 0], name="y"),
 | 
						|
        columns=Index([10, 20], name="x"),
 | 
						|
    ).astype(result_dtype_dict)
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_aggregate_item_by_item(df):
 | 
						|
    grouped = df.groupby("A")
 | 
						|
 | 
						|
    aggfun = lambda ser: ser.size
 | 
						|
    result = grouped.agg(aggfun)
 | 
						|
    foo = (df.A == "foo").sum()
 | 
						|
    bar = (df.A == "bar").sum()
 | 
						|
    K = len(result.columns)
 | 
						|
 | 
						|
    # GH5782
 | 
						|
    exp = Series(np.array([foo] * K), index=list("BCD"), name="foo")
 | 
						|
    tm.assert_series_equal(result.xs("foo"), exp)
 | 
						|
 | 
						|
    exp = Series(np.array([bar] * K), index=list("BCD"), name="bar")
 | 
						|
    tm.assert_almost_equal(result.xs("bar"), exp)
 | 
						|
 | 
						|
    def aggfun(ser):
 | 
						|
        return ser.size
 | 
						|
 | 
						|
    result = DataFrame().groupby(df.A).agg(aggfun)
 | 
						|
    assert isinstance(result, DataFrame)
 | 
						|
    assert len(result) == 0
 | 
						|
 | 
						|
 | 
						|
def test_wrap_agg_out(three_group):
 | 
						|
    grouped = three_group.groupby(["A", "B"])
 | 
						|
 | 
						|
    def func(ser):
 | 
						|
        if ser.dtype == object:
 | 
						|
            raise TypeError
 | 
						|
        else:
 | 
						|
            return ser.sum()
 | 
						|
 | 
						|
    with tm.assert_produces_warning(FutureWarning, match="Dropping invalid columns"):
 | 
						|
        result = grouped.aggregate(func)
 | 
						|
    exp_grouped = three_group.loc[:, three_group.columns != "C"]
 | 
						|
    expected = exp_grouped.groupby(["A", "B"]).aggregate(func)
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_agg_multiple_functions_maintain_order(df):
 | 
						|
    # GH #610
 | 
						|
    funcs = [("mean", np.mean), ("max", np.max), ("min", np.min)]
 | 
						|
    result = df.groupby("A")["C"].agg(funcs)
 | 
						|
    exp_cols = Index(["mean", "max", "min"])
 | 
						|
 | 
						|
    tm.assert_index_equal(result.columns, exp_cols)
 | 
						|
 | 
						|
 | 
						|
def test_agg_multiple_functions_same_name():
 | 
						|
    # GH 30880
 | 
						|
    df = DataFrame(
 | 
						|
        np.random.randn(1000, 3),
 | 
						|
        index=pd.date_range("1/1/2012", freq="S", periods=1000),
 | 
						|
        columns=["A", "B", "C"],
 | 
						|
    )
 | 
						|
    result = df.resample("3T").agg(
 | 
						|
        {"A": [partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]}
 | 
						|
    )
 | 
						|
    expected_index = pd.date_range("1/1/2012", freq="3T", periods=6)
 | 
						|
    expected_columns = MultiIndex.from_tuples([("A", "quantile"), ("A", "quantile")])
 | 
						|
    expected_values = np.array(
 | 
						|
        [df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]]
 | 
						|
    ).T
 | 
						|
    expected = DataFrame(
 | 
						|
        expected_values, columns=expected_columns, index=expected_index
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_agg_multiple_functions_same_name_with_ohlc_present():
 | 
						|
    # GH 30880
 | 
						|
    # ohlc expands dimensions, so different test to the above is required.
 | 
						|
    df = DataFrame(
 | 
						|
        np.random.randn(1000, 3),
 | 
						|
        index=pd.date_range("1/1/2012", freq="S", periods=1000, name="dti"),
 | 
						|
        columns=Index(["A", "B", "C"], name="alpha"),
 | 
						|
    )
 | 
						|
    result = df.resample("3T").agg(
 | 
						|
        {"A": ["ohlc", partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]}
 | 
						|
    )
 | 
						|
    expected_index = pd.date_range("1/1/2012", freq="3T", periods=6, name="dti")
 | 
						|
    expected_columns = MultiIndex.from_tuples(
 | 
						|
        [
 | 
						|
            ("A", "ohlc", "open"),
 | 
						|
            ("A", "ohlc", "high"),
 | 
						|
            ("A", "ohlc", "low"),
 | 
						|
            ("A", "ohlc", "close"),
 | 
						|
            ("A", "quantile", "A"),
 | 
						|
            ("A", "quantile", "A"),
 | 
						|
        ],
 | 
						|
        names=["alpha", None, None],
 | 
						|
    )
 | 
						|
    non_ohlc_expected_values = np.array(
 | 
						|
        [df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]]
 | 
						|
    ).T
 | 
						|
    expected_values = np.hstack([df.resample("3T").A.ohlc(), non_ohlc_expected_values])
 | 
						|
    expected = DataFrame(
 | 
						|
        expected_values, columns=expected_columns, index=expected_index
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_multiple_functions_tuples_and_non_tuples(df):
 | 
						|
    # #1359
 | 
						|
    funcs = [("foo", "mean"), "std"]
 | 
						|
    ex_funcs = [("foo", "mean"), ("std", "std")]
 | 
						|
 | 
						|
    result = df.groupby("A")["C"].agg(funcs)
 | 
						|
    expected = df.groupby("A")["C"].agg(ex_funcs)
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    with tm.assert_produces_warning(
 | 
						|
        FutureWarning, match=r"\['B'\] did not aggregate successfully"
 | 
						|
    ):
 | 
						|
        result = df.groupby("A").agg(funcs)
 | 
						|
    with tm.assert_produces_warning(
 | 
						|
        FutureWarning, match=r"\['B'\] did not aggregate successfully"
 | 
						|
    ):
 | 
						|
        expected = df.groupby("A").agg(ex_funcs)
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_more_flexible_frame_multi_function(df):
 | 
						|
    grouped = df.groupby("A")
 | 
						|
 | 
						|
    exmean = grouped.agg({"C": np.mean, "D": np.mean})
 | 
						|
    exstd = grouped.agg({"C": np.std, "D": np.std})
 | 
						|
 | 
						|
    expected = concat([exmean, exstd], keys=["mean", "std"], axis=1)
 | 
						|
    expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1)
 | 
						|
 | 
						|
    d = {"C": [np.mean, np.std], "D": [np.mean, np.std]}
 | 
						|
    result = grouped.aggregate(d)
 | 
						|
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    # be careful
 | 
						|
    result = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]})
 | 
						|
    expected = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]})
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def foo(x):
 | 
						|
        return np.mean(x)
 | 
						|
 | 
						|
    def bar(x):
 | 
						|
        return np.std(x, ddof=1)
 | 
						|
 | 
						|
    # this uses column selection & renaming
 | 
						|
    msg = r"nested renamer is not supported"
 | 
						|
    with pytest.raises(SpecificationError, match=msg):
 | 
						|
        d = {"C": np.mean, "D": {"foo": np.mean, "bar": np.std}}
 | 
						|
        grouped.aggregate(d)
 | 
						|
 | 
						|
    # But without renaming, these functions are OK
 | 
						|
    d = {"C": [np.mean], "D": [foo, bar]}
 | 
						|
    grouped.aggregate(d)
 | 
						|
 | 
						|
 | 
						|
def test_multi_function_flexible_mix(df):
 | 
						|
    # GH #1268
 | 
						|
    grouped = df.groupby("A")
 | 
						|
 | 
						|
    # Expected
 | 
						|
    d = {"C": {"foo": "mean", "bar": "std"}, "D": {"sum": "sum"}}
 | 
						|
    # this uses column selection & renaming
 | 
						|
    msg = r"nested renamer is not supported"
 | 
						|
    with pytest.raises(SpecificationError, match=msg):
 | 
						|
        grouped.aggregate(d)
 | 
						|
 | 
						|
    # Test 1
 | 
						|
    d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"}
 | 
						|
    # this uses column selection & renaming
 | 
						|
    with pytest.raises(SpecificationError, match=msg):
 | 
						|
        grouped.aggregate(d)
 | 
						|
 | 
						|
    # Test 2
 | 
						|
    d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"}
 | 
						|
    # this uses column selection & renaming
 | 
						|
    with pytest.raises(SpecificationError, match=msg):
 | 
						|
        grouped.aggregate(d)
 | 
						|
 | 
						|
 | 
						|
def test_groupby_agg_coercing_bools():
 | 
						|
    # issue 14873
 | 
						|
    dat = DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]})
 | 
						|
    gp = dat.groupby("a")
 | 
						|
 | 
						|
    index = Index([1, 2], name="a")
 | 
						|
 | 
						|
    result = gp["b"].aggregate(lambda x: (x != 0).all())
 | 
						|
    expected = Series([False, True], index=index, name="b")
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
    result = gp["c"].aggregate(lambda x: x.isnull().all())
 | 
						|
    expected = Series([True, False], index=index, name="c")
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "op",
 | 
						|
    [
 | 
						|
        lambda x: x.sum(),
 | 
						|
        lambda x: x.cumsum(),
 | 
						|
        lambda x: x.transform("sum"),
 | 
						|
        lambda x: x.transform("cumsum"),
 | 
						|
        lambda x: x.agg("sum"),
 | 
						|
        lambda x: x.agg("cumsum"),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_bool_agg_dtype(op):
 | 
						|
    # GH 7001
 | 
						|
    # Bool sum aggregations result in int
 | 
						|
    df = DataFrame({"a": [1, 1], "b": [False, True]})
 | 
						|
    s = df.set_index("a")["b"]
 | 
						|
 | 
						|
    result = op(df.groupby("a"))["b"].dtype
 | 
						|
    assert is_integer_dtype(result)
 | 
						|
 | 
						|
    result = op(s.groupby("a")).dtype
 | 
						|
    assert is_integer_dtype(result)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "keys, agg_index",
 | 
						|
    [
 | 
						|
        (["a"], Index([1], name="a")),
 | 
						|
        (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])),
 | 
						|
    ],
 | 
						|
)
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "input_dtype", ["bool", "int32", "int64", "float32", "float64"]
 | 
						|
)
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "result_dtype", ["bool", "int32", "int64", "float32", "float64"]
 | 
						|
)
 | 
						|
@pytest.mark.parametrize("method", ["apply", "aggregate", "transform"])
 | 
						|
def test_callable_result_dtype_frame(
 | 
						|
    keys, agg_index, input_dtype, result_dtype, method
 | 
						|
):
 | 
						|
    # GH 21240
 | 
						|
    df = DataFrame({"a": [1], "b": [2], "c": [True]})
 | 
						|
    df["c"] = df["c"].astype(input_dtype)
 | 
						|
    op = getattr(df.groupby(keys)[["c"]], method)
 | 
						|
    result = op(lambda x: x.astype(result_dtype).iloc[0])
 | 
						|
    expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index
 | 
						|
    expected = DataFrame({"c": [df["c"].iloc[0]]}, index=expected_index).astype(
 | 
						|
        result_dtype
 | 
						|
    )
 | 
						|
    if method == "apply":
 | 
						|
        expected.columns.names = [0]
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "keys, agg_index",
 | 
						|
    [
 | 
						|
        (["a"], Index([1], name="a")),
 | 
						|
        (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])),
 | 
						|
    ],
 | 
						|
)
 | 
						|
@pytest.mark.parametrize("input", [True, 1, 1.0])
 | 
						|
@pytest.mark.parametrize("dtype", [bool, int, float])
 | 
						|
@pytest.mark.parametrize("method", ["apply", "aggregate", "transform"])
 | 
						|
def test_callable_result_dtype_series(keys, agg_index, input, dtype, method):
 | 
						|
    # GH 21240
 | 
						|
    df = DataFrame({"a": [1], "b": [2], "c": [input]})
 | 
						|
    op = getattr(df.groupby(keys)["c"], method)
 | 
						|
    result = op(lambda x: x.astype(dtype).iloc[0])
 | 
						|
    expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index
 | 
						|
    expected = Series([df["c"].iloc[0]], index=expected_index, name="c").astype(dtype)
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_order_aggregate_multiple_funcs():
 | 
						|
    # GH 25692
 | 
						|
    df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
 | 
						|
 | 
						|
    res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
 | 
						|
    result = res.columns.levels[1]
 | 
						|
 | 
						|
    expected = Index(["sum", "max", "mean", "ohlc", "min"])
 | 
						|
 | 
						|
    tm.assert_index_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("dtype", [np.int64, np.uint64])
 | 
						|
@pytest.mark.parametrize("how", ["first", "last", "min", "max", "mean", "median"])
 | 
						|
def test_uint64_type_handling(dtype, how):
 | 
						|
    # GH 26310
 | 
						|
    df = DataFrame({"x": 6903052872240755750, "y": [1, 2]})
 | 
						|
    expected = df.groupby("y").agg({"x": how})
 | 
						|
    df.x = df.x.astype(dtype)
 | 
						|
    result = df.groupby("y").agg({"x": how})
 | 
						|
    if how not in ("mean", "median"):
 | 
						|
        # mean and median always result in floats
 | 
						|
        result.x = result.x.astype(np.int64)
 | 
						|
    tm.assert_frame_equal(result, expected, check_exact=True)
 | 
						|
 | 
						|
 | 
						|
def test_func_duplicates_raises():
 | 
						|
    # GH28426
 | 
						|
    msg = "Function names"
 | 
						|
    df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
 | 
						|
    with pytest.raises(SpecificationError, match=msg):
 | 
						|
        df.groupby("A").agg(["min", "min"])
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "index",
 | 
						|
    [
 | 
						|
        pd.CategoricalIndex(list("abc")),
 | 
						|
        pd.interval_range(0, 3),
 | 
						|
        pd.period_range("2020", periods=3, freq="D"),
 | 
						|
        MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_agg_index_has_complex_internals(index):
 | 
						|
    # GH 31223
 | 
						|
    df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index)
 | 
						|
    result = df.groupby("group").agg({"value": Series.nunique})
 | 
						|
    expected = DataFrame({"group": [1, 2], "value": [2, 1]}).set_index("group")
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_agg_split_block():
 | 
						|
    # https://github.com/pandas-dev/pandas/issues/31522
 | 
						|
    df = DataFrame(
 | 
						|
        {
 | 
						|
            "key1": ["a", "a", "b", "b", "a"],
 | 
						|
            "key2": ["one", "two", "one", "two", "one"],
 | 
						|
            "key3": ["three", "three", "three", "six", "six"],
 | 
						|
        }
 | 
						|
    )
 | 
						|
    result = df.groupby("key1").min()
 | 
						|
    expected = DataFrame(
 | 
						|
        {"key2": ["one", "one"], "key3": ["six", "six"]},
 | 
						|
        index=Index(["a", "b"], name="key1"),
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_agg_split_object_part_datetime():
 | 
						|
    # https://github.com/pandas-dev/pandas/pull/31616
 | 
						|
    df = DataFrame(
 | 
						|
        {
 | 
						|
            "A": pd.date_range("2000", periods=4),
 | 
						|
            "B": ["a", "b", "c", "d"],
 | 
						|
            "C": [1, 2, 3, 4],
 | 
						|
            "D": ["b", "c", "d", "e"],
 | 
						|
            "E": pd.date_range("2000", periods=4),
 | 
						|
            "F": [1, 2, 3, 4],
 | 
						|
        }
 | 
						|
    ).astype(object)
 | 
						|
    result = df.groupby([0, 0, 0, 0]).min()
 | 
						|
    expected = DataFrame(
 | 
						|
        {
 | 
						|
            "A": [pd.Timestamp("2000")],
 | 
						|
            "B": ["a"],
 | 
						|
            "C": [1],
 | 
						|
            "D": ["b"],
 | 
						|
            "E": [pd.Timestamp("2000")],
 | 
						|
            "F": [1],
 | 
						|
        }
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
class TestNamedAggregationSeries:
 | 
						|
    def test_series_named_agg(self):
 | 
						|
        df = Series([1, 2, 3, 4])
 | 
						|
        gr = df.groupby([0, 0, 1, 1])
 | 
						|
        result = gr.agg(a="sum", b="min")
 | 
						|
        expected = DataFrame(
 | 
						|
            {"a": [3, 7], "b": [1, 3]}, columns=["a", "b"], index=[0, 1]
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        result = gr.agg(b="min", a="sum")
 | 
						|
        expected = expected[["b", "a"]]
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_no_args_raises(self):
 | 
						|
        gr = Series([1, 2]).groupby([0, 1])
 | 
						|
        with pytest.raises(TypeError, match="Must provide"):
 | 
						|
            gr.agg()
 | 
						|
 | 
						|
        # but we do allow this
 | 
						|
        result = gr.agg([])
 | 
						|
        expected = DataFrame()
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_series_named_agg_duplicates_no_raises(self):
 | 
						|
        # GH28426
 | 
						|
        gr = Series([1, 2, 3]).groupby([0, 0, 1])
 | 
						|
        grouped = gr.agg(a="sum", b="sum")
 | 
						|
        expected = DataFrame({"a": [3, 3], "b": [3, 3]})
 | 
						|
        tm.assert_frame_equal(expected, grouped)
 | 
						|
 | 
						|
    def test_mangled(self):
 | 
						|
        gr = Series([1, 2, 3]).groupby([0, 0, 1])
 | 
						|
        result = gr.agg(a=lambda x: 0, b=lambda x: 1)
 | 
						|
        expected = DataFrame({"a": [0, 0], "b": [1, 1]})
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "inp",
 | 
						|
        [
 | 
						|
            pd.NamedAgg(column="anything", aggfunc="min"),
 | 
						|
            ("anything", "min"),
 | 
						|
            ["anything", "min"],
 | 
						|
        ],
 | 
						|
    )
 | 
						|
    def test_named_agg_nametuple(self, inp):
 | 
						|
        # GH34422
 | 
						|
        s = Series([1, 1, 2, 2, 3, 3, 4, 5])
 | 
						|
        msg = f"func is expected but received {type(inp).__name__}"
 | 
						|
        with pytest.raises(TypeError, match=msg):
 | 
						|
            s.groupby(s.values).agg(a=inp)
 | 
						|
 | 
						|
 | 
						|
class TestNamedAggregationDataFrame:
 | 
						|
    def test_agg_relabel(self):
 | 
						|
        df = DataFrame(
 | 
						|
            {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
 | 
						|
        )
 | 
						|
        result = df.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max"))
 | 
						|
        expected = DataFrame(
 | 
						|
            {"a_max": [1, 3], "b_max": [6, 8]},
 | 
						|
            index=Index(["a", "b"], name="group"),
 | 
						|
            columns=["a_max", "b_max"],
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        # order invariance
 | 
						|
        p98 = functools.partial(np.percentile, q=98)
 | 
						|
        result = df.groupby("group").agg(
 | 
						|
            b_min=("B", "min"),
 | 
						|
            a_min=("A", min),
 | 
						|
            a_mean=("A", np.mean),
 | 
						|
            a_max=("A", "max"),
 | 
						|
            b_max=("B", "max"),
 | 
						|
            a_98=("A", p98),
 | 
						|
        )
 | 
						|
        expected = DataFrame(
 | 
						|
            {
 | 
						|
                "b_min": [5, 7],
 | 
						|
                "a_min": [0, 2],
 | 
						|
                "a_mean": [0.5, 2.5],
 | 
						|
                "a_max": [1, 3],
 | 
						|
                "b_max": [6, 8],
 | 
						|
                "a_98": [0.98, 2.98],
 | 
						|
            },
 | 
						|
            index=Index(["a", "b"], name="group"),
 | 
						|
            columns=["b_min", "a_min", "a_mean", "a_max", "b_max", "a_98"],
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_agg_relabel_non_identifier(self):
 | 
						|
        df = DataFrame(
 | 
						|
            {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
 | 
						|
        )
 | 
						|
 | 
						|
        result = df.groupby("group").agg(**{"my col": ("A", "max")})
 | 
						|
        expected = DataFrame({"my col": [1, 3]}, index=Index(["a", "b"], name="group"))
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_duplicate_no_raises(self):
 | 
						|
        # GH 28426, if use same input function on same column,
 | 
						|
        # no error should raise
 | 
						|
        df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
 | 
						|
 | 
						|
        grouped = df.groupby("A").agg(a=("B", "min"), b=("B", "min"))
 | 
						|
        expected = DataFrame({"a": [1, 3], "b": [1, 3]}, index=Index([0, 1], name="A"))
 | 
						|
        tm.assert_frame_equal(grouped, expected)
 | 
						|
 | 
						|
        quant50 = functools.partial(np.percentile, q=50)
 | 
						|
        quant70 = functools.partial(np.percentile, q=70)
 | 
						|
        quant50.__name__ = "quant50"
 | 
						|
        quant70.__name__ = "quant70"
 | 
						|
 | 
						|
        test = DataFrame({"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]})
 | 
						|
 | 
						|
        grouped = test.groupby("col1").agg(
 | 
						|
            quantile_50=("col2", quant50), quantile_70=("col2", quant70)
 | 
						|
        )
 | 
						|
        expected = DataFrame(
 | 
						|
            {"quantile_50": [1.5, 4.0], "quantile_70": [1.7, 4.4]},
 | 
						|
            index=Index(["a", "b"], name="col1"),
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(grouped, expected)
 | 
						|
 | 
						|
    def test_agg_relabel_with_level(self):
 | 
						|
        df = DataFrame(
 | 
						|
            {"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]},
 | 
						|
            index=MultiIndex.from_product([["A", "B"], ["a", "b"]]),
 | 
						|
        )
 | 
						|
        result = df.groupby(level=0).agg(
 | 
						|
            aa=("A", "max"), bb=("A", "min"), cc=("B", "mean")
 | 
						|
        )
 | 
						|
        expected = DataFrame(
 | 
						|
            {"aa": [0, 1], "bb": [0, 1], "cc": [1.5, 3.5]}, index=["A", "B"]
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_agg_relabel_other_raises(self):
 | 
						|
        df = DataFrame({"A": [0, 0, 1], "B": [1, 2, 3]})
 | 
						|
        grouped = df.groupby("A")
 | 
						|
        match = "Must provide"
 | 
						|
        with pytest.raises(TypeError, match=match):
 | 
						|
            grouped.agg(foo=1)
 | 
						|
 | 
						|
        with pytest.raises(TypeError, match=match):
 | 
						|
            grouped.agg()
 | 
						|
 | 
						|
        with pytest.raises(TypeError, match=match):
 | 
						|
            grouped.agg(a=("B", "max"), b=(1, 2, 3))
 | 
						|
 | 
						|
    def test_missing_raises(self):
 | 
						|
        df = DataFrame({"A": [0, 1], "B": [1, 2]})
 | 
						|
        match = re.escape("Column(s) ['C'] do not exist")
 | 
						|
        with pytest.raises(KeyError, match=match):
 | 
						|
            df.groupby("A").agg(c=("C", "sum"))
 | 
						|
 | 
						|
    def test_agg_namedtuple(self):
 | 
						|
        df = DataFrame({"A": [0, 1], "B": [1, 2]})
 | 
						|
        result = df.groupby("A").agg(
 | 
						|
            b=pd.NamedAgg("B", "sum"), c=pd.NamedAgg(column="B", aggfunc="count")
 | 
						|
        )
 | 
						|
        expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count"))
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_mangled(self):
 | 
						|
        df = DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]})
 | 
						|
        result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1))
 | 
						|
        expected = DataFrame({"b": [0, 0], "c": [1, 1]}, index=Index([0, 1], name="A"))
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3",
 | 
						|
    [
 | 
						|
        (
 | 
						|
            (("y", "A"), "max"),
 | 
						|
            (("y", "A"), np.min),
 | 
						|
            (("y", "B"), "mean"),
 | 
						|
            [1, 3],
 | 
						|
            [0, 2],
 | 
						|
            [5.5, 7.5],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            (("y", "A"), lambda x: max(x)),
 | 
						|
            (("y", "A"), lambda x: 1),
 | 
						|
            (("y", "B"), "mean"),
 | 
						|
            [1, 3],
 | 
						|
            [1, 1],
 | 
						|
            [5.5, 7.5],
 | 
						|
        ),
 | 
						|
        (
 | 
						|
            pd.NamedAgg(("y", "A"), "max"),
 | 
						|
            pd.NamedAgg(("y", "B"), np.mean),
 | 
						|
            pd.NamedAgg(("y", "A"), lambda x: 1),
 | 
						|
            [1, 3],
 | 
						|
            [5.5, 7.5],
 | 
						|
            [1, 1],
 | 
						|
        ),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_agg_relabel_multiindex_column(
 | 
						|
    agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3
 | 
						|
):
 | 
						|
    # GH 29422, add tests for multiindex column cases
 | 
						|
    df = DataFrame(
 | 
						|
        {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
 | 
						|
    )
 | 
						|
    df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
 | 
						|
    idx = Index(["a", "b"], name=("x", "group"))
 | 
						|
 | 
						|
    result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max"))
 | 
						|
    expected = DataFrame({"a_max": [1, 3]}, index=idx)
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    result = df.groupby(("x", "group")).agg(
 | 
						|
        col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
 | 
						|
    )
 | 
						|
    expected = DataFrame(
 | 
						|
        {"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_agg_relabel_multiindex_raises_not_exist():
 | 
						|
    # GH 29422, add test for raises scenario when aggregate column does not exist
 | 
						|
    df = DataFrame(
 | 
						|
        {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
 | 
						|
    )
 | 
						|
    df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
 | 
						|
 | 
						|
    with pytest.raises(KeyError, match="do not exist"):
 | 
						|
        df.groupby(("x", "group")).agg(a=(("Y", "a"), "max"))
 | 
						|
 | 
						|
 | 
						|
def test_agg_relabel_multiindex_duplicates():
 | 
						|
    # GH29422, add test for raises scenario when getting duplicates
 | 
						|
    # GH28426, after this change, duplicates should also work if the relabelling is
 | 
						|
    # different
 | 
						|
    df = DataFrame(
 | 
						|
        {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
 | 
						|
    )
 | 
						|
    df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
 | 
						|
 | 
						|
    result = df.groupby(("x", "group")).agg(
 | 
						|
        a=(("y", "A"), "min"), b=(("y", "A"), "min")
 | 
						|
    )
 | 
						|
    idx = Index(["a", "b"], name=("x", "group"))
 | 
						|
    expected = DataFrame({"a": [0, 2], "b": [0, 2]}, index=idx)
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("kwargs", [{"c": ["min"]}, {"b": [], "c": ["min"]}])
 | 
						|
def test_groupby_aggregate_empty_key(kwargs):
 | 
						|
    # GH: 32580
 | 
						|
    df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]})
 | 
						|
    result = df.groupby("a").agg(kwargs)
 | 
						|
    expected = DataFrame(
 | 
						|
        [1, 4],
 | 
						|
        index=Index([1, 2], dtype="int64", name="a"),
 | 
						|
        columns=MultiIndex.from_tuples([["c", "min"]]),
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_groupby_aggregate_empty_key_empty_return():
 | 
						|
    # GH: 32580 Check if everything works, when return is empty
 | 
						|
    df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]})
 | 
						|
    result = df.groupby("a").agg({"b": []})
 | 
						|
    expected = DataFrame(columns=MultiIndex(levels=[["b"], []], codes=[[], []]))
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_groupby_aggregate_empty_with_multiindex_frame():
 | 
						|
    # GH 39178
 | 
						|
    df = DataFrame(columns=["a", "b", "c"])
 | 
						|
    result = df.groupby(["a", "b"]).agg(d=("c", list))
 | 
						|
    expected = DataFrame(
 | 
						|
        columns=["d"], index=MultiIndex([[], []], [[], []], names=["a", "b"])
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_grouby_agg_loses_results_with_as_index_false_relabel():
 | 
						|
    # GH 32240: When the aggregate function relabels column names and
 | 
						|
    # as_index=False is specified, the results are dropped.
 | 
						|
 | 
						|
    df = DataFrame(
 | 
						|
        {"key": ["x", "y", "z", "x", "y", "z"], "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75]}
 | 
						|
    )
 | 
						|
 | 
						|
    grouped = df.groupby("key", as_index=False)
 | 
						|
    result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min"))
 | 
						|
    expected = DataFrame({"key": ["x", "y", "z"], "min_val": [1.0, 0.8, 0.75]})
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex():
 | 
						|
    # GH 32240: When the aggregate function relabels column names and
 | 
						|
    # as_index=False is specified, the results are dropped. Check if
 | 
						|
    # multiindex is returned in the right order
 | 
						|
 | 
						|
    df = DataFrame(
 | 
						|
        {
 | 
						|
            "key": ["x", "y", "x", "y", "x", "x"],
 | 
						|
            "key1": ["a", "b", "c", "b", "a", "c"],
 | 
						|
            "val": [1.0, 0.8, 2.0, 3.0, 3.6, 0.75],
 | 
						|
        }
 | 
						|
    )
 | 
						|
 | 
						|
    grouped = df.groupby(["key", "key1"], as_index=False)
 | 
						|
    result = grouped.agg(min_val=pd.NamedAgg(column="val", aggfunc="min"))
 | 
						|
    expected = DataFrame(
 | 
						|
        {"key": ["x", "x", "y"], "key1": ["a", "c", "b"], "min_val": [1.0, 0.75, 0.8]}
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)]
 | 
						|
)
 | 
						|
def test_multiindex_custom_func(func):
 | 
						|
    # GH 31777
 | 
						|
    data = [[1, 4, 2], [5, 7, 1]]
 | 
						|
    df = DataFrame(
 | 
						|
        data,
 | 
						|
        columns=MultiIndex.from_arrays(
 | 
						|
            [[1, 1, 2], [3, 4, 3]], names=["Sisko", "Janeway"]
 | 
						|
        ),
 | 
						|
    )
 | 
						|
    result = df.groupby(np.array([0, 1])).agg(func)
 | 
						|
    expected_dict = {
 | 
						|
        (1, 3): {0: 1.0, 1: 5.0},
 | 
						|
        (1, 4): {0: 4.0, 1: 7.0},
 | 
						|
        (2, 3): {0: 2.0, 1: 1.0},
 | 
						|
    }
 | 
						|
    expected = DataFrame(expected_dict)
 | 
						|
    expected.columns = df.columns
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def myfunc(s):
 | 
						|
    return np.percentile(s, q=0.90)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("func", [lambda s: np.percentile(s, q=0.90), myfunc])
 | 
						|
def test_lambda_named_agg(func):
 | 
						|
    # see gh-28467
 | 
						|
    animals = DataFrame(
 | 
						|
        {
 | 
						|
            "kind": ["cat", "dog", "cat", "dog"],
 | 
						|
            "height": [9.1, 6.0, 9.5, 34.0],
 | 
						|
            "weight": [7.9, 7.5, 9.9, 198.0],
 | 
						|
        }
 | 
						|
    )
 | 
						|
 | 
						|
    result = animals.groupby("kind").agg(
 | 
						|
        mean_height=("height", "mean"), perc90=("height", func)
 | 
						|
    )
 | 
						|
    expected = DataFrame(
 | 
						|
        [[9.3, 9.1036], [20.0, 6.252]],
 | 
						|
        columns=["mean_height", "perc90"],
 | 
						|
        index=Index(["cat", "dog"], name="kind"),
 | 
						|
    )
 | 
						|
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_aggregate_mixed_types():
 | 
						|
    # GH 16916
 | 
						|
    df = DataFrame(
 | 
						|
        data=np.array([0] * 9).reshape(3, 3), columns=list("XYZ"), index=list("abc")
 | 
						|
    )
 | 
						|
    df["grouping"] = ["group 1", "group 1", 2]
 | 
						|
    result = df.groupby("grouping").aggregate(lambda x: x.tolist())
 | 
						|
    expected_data = [[[0], [0], [0]], [[0, 0], [0, 0], [0, 0]]]
 | 
						|
    expected = DataFrame(
 | 
						|
        expected_data,
 | 
						|
        index=Index([2, "group 1"], dtype="object", name="grouping"),
 | 
						|
        columns=Index(["X", "Y", "Z"], dtype="object"),
 | 
						|
    )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.xfail(reason="Not implemented;see GH 31256")
 | 
						|
def test_aggregate_udf_na_extension_type():
 | 
						|
    # https://github.com/pandas-dev/pandas/pull/31359
 | 
						|
    # This is currently failing to cast back to Int64Dtype.
 | 
						|
    # The presence of the NA causes two problems
 | 
						|
    # 1. NA is not an instance of Int64Dtype.type (numpy.int64)
 | 
						|
    # 2. The presence of an NA forces object type, so the non-NA values is
 | 
						|
    #    a Python int rather than a NumPy int64. Python ints aren't
 | 
						|
    #    instances of numpy.int64.
 | 
						|
    def aggfunc(x):
 | 
						|
        if all(x > 2):
 | 
						|
            return 1
 | 
						|
        else:
 | 
						|
            return pd.NA
 | 
						|
 | 
						|
    df = DataFrame({"A": pd.array([1, 2, 3])})
 | 
						|
    result = df.groupby([1, 1, 2]).agg(aggfunc)
 | 
						|
    expected = DataFrame({"A": pd.array([1, pd.NA], dtype="Int64")}, index=[1, 2])
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
class TestLambdaMangling:
 | 
						|
    def test_basic(self):
 | 
						|
        df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
 | 
						|
        result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]})
 | 
						|
 | 
						|
        expected = DataFrame(
 | 
						|
            {("B", "<lambda_0>"): [0, 0], ("B", "<lambda_1>"): [1, 1]},
 | 
						|
            index=Index([0, 1], name="A"),
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_mangle_series_groupby(self):
 | 
						|
        gr = Series([1, 2, 3, 4]).groupby([0, 0, 1, 1])
 | 
						|
        result = gr.agg([lambda x: 0, lambda x: 1])
 | 
						|
        expected = DataFrame({"<lambda_0>": [0, 0], "<lambda_1>": [1, 1]})
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.")
 | 
						|
    @pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning")
 | 
						|
    def test_with_kwargs(self):
 | 
						|
        f1 = lambda x, y, b=1: x.sum() + y + b
 | 
						|
        f2 = lambda x, y, b=2: x.sum() + y * b
 | 
						|
        result = Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0)
 | 
						|
        expected = DataFrame({"<lambda_0>": [4], "<lambda_1>": [6]})
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        result = Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10)
 | 
						|
        expected = DataFrame({"<lambda_0>": [13], "<lambda_1>": [30]})
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_agg_with_one_lambda(self):
 | 
						|
        # GH 25719, write tests for DataFrameGroupby.agg with only one lambda
 | 
						|
        df = DataFrame(
 | 
						|
            {
 | 
						|
                "kind": ["cat", "dog", "cat", "dog"],
 | 
						|
                "height": [9.1, 6.0, 9.5, 34.0],
 | 
						|
                "weight": [7.9, 7.5, 9.9, 198.0],
 | 
						|
            }
 | 
						|
        )
 | 
						|
 | 
						|
        columns = ["height_sqr_min", "height_max", "weight_max"]
 | 
						|
        expected = DataFrame(
 | 
						|
            {
 | 
						|
                "height_sqr_min": [82.81, 36.00],
 | 
						|
                "height_max": [9.5, 34.0],
 | 
						|
                "weight_max": [9.9, 198.0],
 | 
						|
            },
 | 
						|
            index=Index(["cat", "dog"], name="kind"),
 | 
						|
            columns=columns,
 | 
						|
        )
 | 
						|
 | 
						|
        # check pd.NameAgg case
 | 
						|
        result1 = df.groupby(by="kind").agg(
 | 
						|
            height_sqr_min=pd.NamedAgg(
 | 
						|
                column="height", aggfunc=lambda x: np.min(x**2)
 | 
						|
            ),
 | 
						|
            height_max=pd.NamedAgg(column="height", aggfunc="max"),
 | 
						|
            weight_max=pd.NamedAgg(column="weight", aggfunc="max"),
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result1, expected)
 | 
						|
 | 
						|
        # check agg(key=(col, aggfunc)) case
 | 
						|
        result2 = df.groupby(by="kind").agg(
 | 
						|
            height_sqr_min=("height", lambda x: np.min(x**2)),
 | 
						|
            height_max=("height", "max"),
 | 
						|
            weight_max=("weight", "max"),
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result2, expected)
 | 
						|
 | 
						|
    def test_agg_multiple_lambda(self):
 | 
						|
        # GH25719, test for DataFrameGroupby.agg with multiple lambdas
 | 
						|
        # with mixed aggfunc
 | 
						|
        df = DataFrame(
 | 
						|
            {
 | 
						|
                "kind": ["cat", "dog", "cat", "dog"],
 | 
						|
                "height": [9.1, 6.0, 9.5, 34.0],
 | 
						|
                "weight": [7.9, 7.5, 9.9, 198.0],
 | 
						|
            }
 | 
						|
        )
 | 
						|
        columns = [
 | 
						|
            "height_sqr_min",
 | 
						|
            "height_max",
 | 
						|
            "weight_max",
 | 
						|
            "height_max_2",
 | 
						|
            "weight_min",
 | 
						|
        ]
 | 
						|
        expected = DataFrame(
 | 
						|
            {
 | 
						|
                "height_sqr_min": [82.81, 36.00],
 | 
						|
                "height_max": [9.5, 34.0],
 | 
						|
                "weight_max": [9.9, 198.0],
 | 
						|
                "height_max_2": [9.5, 34.0],
 | 
						|
                "weight_min": [7.9, 7.5],
 | 
						|
            },
 | 
						|
            index=Index(["cat", "dog"], name="kind"),
 | 
						|
            columns=columns,
 | 
						|
        )
 | 
						|
 | 
						|
        # check agg(key=(col, aggfunc)) case
 | 
						|
        result1 = df.groupby(by="kind").agg(
 | 
						|
            height_sqr_min=("height", lambda x: np.min(x**2)),
 | 
						|
            height_max=("height", "max"),
 | 
						|
            weight_max=("weight", "max"),
 | 
						|
            height_max_2=("height", lambda x: np.max(x)),
 | 
						|
            weight_min=("weight", lambda x: np.min(x)),
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result1, expected)
 | 
						|
 | 
						|
        # check pd.NamedAgg case
 | 
						|
        result2 = df.groupby(by="kind").agg(
 | 
						|
            height_sqr_min=pd.NamedAgg(
 | 
						|
                column="height", aggfunc=lambda x: np.min(x**2)
 | 
						|
            ),
 | 
						|
            height_max=pd.NamedAgg(column="height", aggfunc="max"),
 | 
						|
            weight_max=pd.NamedAgg(column="weight", aggfunc="max"),
 | 
						|
            height_max_2=pd.NamedAgg(column="height", aggfunc=lambda x: np.max(x)),
 | 
						|
            weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)),
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result2, expected)
 | 
						|
 | 
						|
 | 
						|
def test_groupby_get_by_index():
 | 
						|
    # GH 33439
 | 
						|
    df = DataFrame({"A": ["S", "W", "W"], "B": [1.0, 1.0, 2.0]})
 | 
						|
    res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])})
 | 
						|
    expected = DataFrame({"A": ["S", "W"], "B": [1.0, 2.0]}).set_index("A")
 | 
						|
    tm.assert_frame_equal(res, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "grp_col_dict, exp_data",
 | 
						|
    [
 | 
						|
        ({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}),
 | 
						|
        ({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}),
 | 
						|
        ({"nr": "min"}, {"nr": [1, 5]}),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
 | 
						|
    # test single aggregations on ordered categorical cols GHGH27800
 | 
						|
 | 
						|
    # create the result dataframe
 | 
						|
    input_df = DataFrame(
 | 
						|
        {
 | 
						|
            "nr": [1, 2, 3, 4, 5, 6, 7, 8],
 | 
						|
            "cat_ord": list("aabbccdd"),
 | 
						|
            "cat": list("aaaabbbb"),
 | 
						|
        }
 | 
						|
    )
 | 
						|
 | 
						|
    input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
 | 
						|
    input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
 | 
						|
    result_df = input_df.groupby("cat").agg(grp_col_dict)
 | 
						|
 | 
						|
    # create expected dataframe
 | 
						|
    cat_index = pd.CategoricalIndex(
 | 
						|
        ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
 | 
						|
    )
 | 
						|
 | 
						|
    expected_df = DataFrame(data=exp_data, index=cat_index)
 | 
						|
 | 
						|
    if "cat_ord" in expected_df:
 | 
						|
        # ordered categorical columns should be preserved
 | 
						|
        dtype = input_df["cat_ord"].dtype
 | 
						|
        expected_df["cat_ord"] = expected_df["cat_ord"].astype(dtype)
 | 
						|
 | 
						|
    tm.assert_frame_equal(result_df, expected_df)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "grp_col_dict, exp_data",
 | 
						|
    [
 | 
						|
        ({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]),
 | 
						|
        ({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]),
 | 
						|
        ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
 | 
						|
    # test combined aggregations on ordered categorical cols GH27800
 | 
						|
 | 
						|
    # create the result dataframe
 | 
						|
    input_df = DataFrame(
 | 
						|
        {
 | 
						|
            "nr": [1, 2, 3, 4, 5, 6, 7, 8],
 | 
						|
            "cat_ord": list("aabbccdd"),
 | 
						|
            "cat": list("aaaabbbb"),
 | 
						|
        }
 | 
						|
    )
 | 
						|
 | 
						|
    input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
 | 
						|
    input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
 | 
						|
    result_df = input_df.groupby("cat").agg(grp_col_dict)
 | 
						|
 | 
						|
    # create expected dataframe
 | 
						|
    cat_index = pd.CategoricalIndex(
 | 
						|
        ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
 | 
						|
    )
 | 
						|
 | 
						|
    # unpack the grp_col_dict to create the multi-index tuple
 | 
						|
    # this tuple will be used to create the expected dataframe index
 | 
						|
    multi_index_list = []
 | 
						|
    for k, v in grp_col_dict.items():
 | 
						|
        if isinstance(v, list):
 | 
						|
            for value in v:
 | 
						|
                multi_index_list.append([k, value])
 | 
						|
        else:
 | 
						|
            multi_index_list.append([k, v])
 | 
						|
    multi_index = MultiIndex.from_tuples(tuple(multi_index_list))
 | 
						|
 | 
						|
    expected_df = DataFrame(data=exp_data, columns=multi_index, index=cat_index)
 | 
						|
    for col in expected_df.columns:
 | 
						|
        if isinstance(col, tuple) and "cat_ord" in col:
 | 
						|
            # ordered categorical should be preserved
 | 
						|
            expected_df[col] = expected_df[col].astype(input_df["cat_ord"].dtype)
 | 
						|
 | 
						|
    tm.assert_frame_equal(result_df, expected_df)
 | 
						|
 | 
						|
 | 
						|
def test_nonagg_agg():
 | 
						|
    # GH 35490 - Single/Multiple agg of non-agg function give same results
 | 
						|
    # TODO: agg should raise for functions that don't aggregate
 | 
						|
    df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]})
 | 
						|
    g = df.groupby("a")
 | 
						|
 | 
						|
    result = g.agg(["cumsum"])
 | 
						|
    result.columns = result.columns.droplevel(-1)
 | 
						|
    expected = g.agg("cumsum")
 | 
						|
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_aggregate_datetime_objects():
 | 
						|
    # https://github.com/pandas-dev/pandas/issues/36003
 | 
						|
    # ensure we don't raise an error but keep object dtype for out-of-bounds
 | 
						|
    # datetimes
 | 
						|
    df = DataFrame(
 | 
						|
        {
 | 
						|
            "A": ["X", "Y"],
 | 
						|
            "B": [
 | 
						|
                datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
 | 
						|
                datetime.datetime(3005, 1, 1, 10, 30, 23, 540000),
 | 
						|
            ],
 | 
						|
        }
 | 
						|
    )
 | 
						|
    result = df.groupby("A").B.max()
 | 
						|
    expected = df.set_index("A")["B"]
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_groupby_index_object_dtype():
 | 
						|
    # GH 40014
 | 
						|
    df = DataFrame({"c0": ["x", "x", "x"], "c1": ["x", "x", "y"], "p": [0, 1, 2]})
 | 
						|
    df.index = df.index.astype("O")
 | 
						|
    grouped = df.groupby(["c0", "c1"])
 | 
						|
    res = grouped.p.agg(lambda x: all(x > 0))
 | 
						|
    # Check that providing a user-defined function in agg()
 | 
						|
    # produces the correct index shape when using an object-typed index.
 | 
						|
    expected_index = MultiIndex.from_tuples(
 | 
						|
        [("x", "x"), ("x", "y")], names=("c0", "c1")
 | 
						|
    )
 | 
						|
    expected = Series([False, True], index=expected_index, name="p")
 | 
						|
    tm.assert_series_equal(res, expected)
 | 
						|
 | 
						|
 | 
						|
def test_timeseries_groupby_agg():
 | 
						|
    # GH#43290
 | 
						|
 | 
						|
    def func(ser):
 | 
						|
        if ser.isna().all():
 | 
						|
            return None
 | 
						|
        return np.sum(ser)
 | 
						|
 | 
						|
    df = DataFrame([1.0], index=[pd.Timestamp("2018-01-16 00:00:00+00:00")])
 | 
						|
    res = df.groupby(lambda x: 1).agg(func)
 | 
						|
 | 
						|
    expected = DataFrame([[1.0]], index=[1])
 | 
						|
    tm.assert_frame_equal(res, expected)
 | 
						|
 | 
						|
 | 
						|
def test_groupby_aggregate_directory(reduction_func):
 | 
						|
    # GH#32793
 | 
						|
    if reduction_func in ["corrwith", "nth"]:
 | 
						|
        return None
 | 
						|
 | 
						|
    obj = DataFrame([[0, 1], [0, np.nan]])
 | 
						|
 | 
						|
    result_reduced_series = obj.groupby(0).agg(reduction_func)
 | 
						|
    result_reduced_frame = obj.groupby(0).agg({1: reduction_func})
 | 
						|
 | 
						|
    if reduction_func in ["size", "ngroup"]:
 | 
						|
        # names are different: None / 1
 | 
						|
        tm.assert_series_equal(
 | 
						|
            result_reduced_series, result_reduced_frame[1], check_names=False
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        tm.assert_frame_equal(result_reduced_series, result_reduced_frame)
 | 
						|
        tm.assert_series_equal(
 | 
						|
            result_reduced_series.dtypes, result_reduced_frame.dtypes
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def test_group_mean_timedelta_nat():
 | 
						|
    # GH43132
 | 
						|
    data = Series(["1 day", "3 days", "NaT"], dtype="timedelta64[ns]")
 | 
						|
    expected = Series(["2 days"], dtype="timedelta64[ns]")
 | 
						|
 | 
						|
    result = data.groupby([0, 0, 0]).mean()
 | 
						|
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "input_data, expected_output",
 | 
						|
    [
 | 
						|
        (  # no timezone
 | 
						|
            ["2021-01-01T00:00", "NaT", "2021-01-01T02:00"],
 | 
						|
            ["2021-01-01T01:00"],
 | 
						|
        ),
 | 
						|
        (  # timezone
 | 
						|
            ["2021-01-01T00:00-0100", "NaT", "2021-01-01T02:00-0100"],
 | 
						|
            ["2021-01-01T01:00-0100"],
 | 
						|
        ),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_group_mean_datetime64_nat(input_data, expected_output):
 | 
						|
    # GH43132
 | 
						|
    data = to_datetime(Series(input_data))
 | 
						|
    expected = to_datetime(Series(expected_output))
 | 
						|
 | 
						|
    result = data.groupby([0, 0, 0]).mean()
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "func, output", [("mean", [8 + 18j, 10 + 22j]), ("sum", [40 + 90j, 50 + 110j])]
 | 
						|
)
 | 
						|
def test_groupby_complex(func, output):
 | 
						|
    # GH#43701
 | 
						|
    data = Series(np.arange(20).reshape(10, 2).dot([1, 2j]))
 | 
						|
    result = data.groupby(data.index % 2).agg(func)
 | 
						|
    expected = Series(output)
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("func", ["min", "max", "var"])
 | 
						|
def test_groupby_complex_raises(func):
 | 
						|
    # GH#43701
 | 
						|
    data = Series(np.arange(20).reshape(10, 2).dot([1, 2j]))
 | 
						|
    msg = "No matching signature found"
 | 
						|
    with pytest.raises(TypeError, match=msg):
 | 
						|
        data.groupby(data.index % 2).agg(func)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "func", [["min"], ["mean", "max"], {"b": "sum"}, {"b": "prod", "c": "median"}]
 | 
						|
)
 | 
						|
def test_multi_axis_1_raises(func):
 | 
						|
    # GH#46995
 | 
						|
    df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]})
 | 
						|
    gb = df.groupby("a", axis=1)
 | 
						|
    with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"):
 | 
						|
        gb.agg(func)
 |