107 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			107 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import pytest
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
import pandas._testing as tm
 | 
						|
from pandas.tests.extension.base.base import BaseExtensionTests
 | 
						|
 | 
						|
 | 
						|
class BaseGroupbyTests(BaseExtensionTests):
 | 
						|
    """Groupby-specific tests."""
 | 
						|
 | 
						|
    def test_grouping_grouper(self, data_for_grouping):
 | 
						|
        df = pd.DataFrame(
 | 
						|
            {"A": ["B", "B", None, None, "A", "A", "B", "C"], "B": data_for_grouping}
 | 
						|
        )
 | 
						|
        gr1 = df.groupby("A").grouper.groupings[0]
 | 
						|
        gr2 = df.groupby("B").grouper.groupings[0]
 | 
						|
 | 
						|
        tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values)
 | 
						|
        tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("as_index", [True, False])
 | 
						|
    def test_groupby_extension_agg(self, as_index, data_for_grouping):
 | 
						|
        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
 | 
						|
        result = df.groupby("B", as_index=as_index).A.mean()
 | 
						|
        _, uniques = pd.factorize(data_for_grouping, sort=True)
 | 
						|
 | 
						|
        if as_index:
 | 
						|
            index = pd.Index._with_infer(uniques, name="B")
 | 
						|
            expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A")
 | 
						|
            self.assert_series_equal(result, expected)
 | 
						|
        else:
 | 
						|
            expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0, 4.0]})
 | 
						|
            self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_groupby_agg_extension(self, data_for_grouping):
 | 
						|
        # GH#38980 groupby agg on extension type fails for non-numeric types
 | 
						|
        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
 | 
						|
 | 
						|
        expected = df.iloc[[0, 2, 4, 7]]
 | 
						|
        expected = expected.set_index("A")
 | 
						|
 | 
						|
        result = df.groupby("A").agg({"B": "first"})
 | 
						|
        self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        result = df.groupby("A").agg("first")
 | 
						|
        self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        result = df.groupby("A").first()
 | 
						|
        self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_groupby_extension_no_sort(self, data_for_grouping):
 | 
						|
        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
 | 
						|
        result = df.groupby("B", sort=False).A.mean()
 | 
						|
        _, index = pd.factorize(data_for_grouping, sort=False)
 | 
						|
 | 
						|
        index = pd.Index._with_infer(index, name="B")
 | 
						|
        expected = pd.Series([1.0, 3.0, 4.0], index=index, name="A")
 | 
						|
        self.assert_series_equal(result, expected)
 | 
						|
 | 
						|
    def test_groupby_extension_transform(self, data_for_grouping):
 | 
						|
        valid = data_for_grouping[~data_for_grouping.isna()]
 | 
						|
        df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], "B": valid})
 | 
						|
 | 
						|
        result = df.groupby("B").A.transform(len)
 | 
						|
        expected = pd.Series([3, 3, 2, 2, 3, 1], name="A")
 | 
						|
 | 
						|
        self.assert_series_equal(result, expected)
 | 
						|
 | 
						|
    def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
 | 
						|
        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
 | 
						|
        df.groupby("B").apply(groupby_apply_op)
 | 
						|
        df.groupby("B").A.apply(groupby_apply_op)
 | 
						|
        df.groupby("A").apply(groupby_apply_op)
 | 
						|
        df.groupby("A").B.apply(groupby_apply_op)
 | 
						|
 | 
						|
    def test_groupby_apply_identity(self, data_for_grouping):
 | 
						|
        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
 | 
						|
        result = df.groupby("A").B.apply(lambda x: x.array)
 | 
						|
        expected = pd.Series(
 | 
						|
            [
 | 
						|
                df.B.iloc[[0, 1, 6]].array,
 | 
						|
                df.B.iloc[[2, 3]].array,
 | 
						|
                df.B.iloc[[4, 5]].array,
 | 
						|
                df.B.iloc[[7]].array,
 | 
						|
            ],
 | 
						|
            index=pd.Index([1, 2, 3, 4], name="A"),
 | 
						|
            name="B",
 | 
						|
        )
 | 
						|
        self.assert_series_equal(result, expected)
 | 
						|
 | 
						|
    def test_in_numeric_groupby(self, data_for_grouping):
 | 
						|
        df = pd.DataFrame(
 | 
						|
            {
 | 
						|
                "A": [1, 1, 2, 2, 3, 3, 1, 4],
 | 
						|
                "B": data_for_grouping,
 | 
						|
                "C": [1, 1, 1, 1, 1, 1, 1, 1],
 | 
						|
            }
 | 
						|
        )
 | 
						|
        result = df.groupby("A").sum().columns
 | 
						|
 | 
						|
        if data_for_grouping.dtype._is_numeric:
 | 
						|
            expected = pd.Index(["B", "C"])
 | 
						|
        else:
 | 
						|
            expected = pd.Index(["C"])
 | 
						|
 | 
						|
        tm.assert_index_equal(result, expected)
 |