针对pulse-transit的工具
This commit is contained in:
		
							
								
								
									
										827
									
								
								dist/client/pandas/tests/reshape/test_crosstab.py
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										827
									
								
								dist/client/pandas/tests/reshape/test_crosstab.py
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,827 @@
 | 
			
		||||
import numpy as np
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
from pandas.core.dtypes.common import is_categorical_dtype
 | 
			
		||||
 | 
			
		||||
import pandas as pd
 | 
			
		||||
from pandas import (
 | 
			
		||||
    CategoricalIndex,
 | 
			
		||||
    DataFrame,
 | 
			
		||||
    Index,
 | 
			
		||||
    MultiIndex,
 | 
			
		||||
    Series,
 | 
			
		||||
    crosstab,
 | 
			
		||||
)
 | 
			
		||||
import pandas._testing as tm
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestCrosstab:
 | 
			
		||||
    def setup_method(self, method):
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {
 | 
			
		||||
                "A": [
 | 
			
		||||
                    "foo",
 | 
			
		||||
                    "foo",
 | 
			
		||||
                    "foo",
 | 
			
		||||
                    "foo",
 | 
			
		||||
                    "bar",
 | 
			
		||||
                    "bar",
 | 
			
		||||
                    "bar",
 | 
			
		||||
                    "bar",
 | 
			
		||||
                    "foo",
 | 
			
		||||
                    "foo",
 | 
			
		||||
                    "foo",
 | 
			
		||||
                ],
 | 
			
		||||
                "B": [
 | 
			
		||||
                    "one",
 | 
			
		||||
                    "one",
 | 
			
		||||
                    "one",
 | 
			
		||||
                    "two",
 | 
			
		||||
                    "one",
 | 
			
		||||
                    "one",
 | 
			
		||||
                    "one",
 | 
			
		||||
                    "two",
 | 
			
		||||
                    "two",
 | 
			
		||||
                    "two",
 | 
			
		||||
                    "one",
 | 
			
		||||
                ],
 | 
			
		||||
                "C": [
 | 
			
		||||
                    "dull",
 | 
			
		||||
                    "dull",
 | 
			
		||||
                    "shiny",
 | 
			
		||||
                    "dull",
 | 
			
		||||
                    "dull",
 | 
			
		||||
                    "shiny",
 | 
			
		||||
                    "shiny",
 | 
			
		||||
                    "dull",
 | 
			
		||||
                    "shiny",
 | 
			
		||||
                    "shiny",
 | 
			
		||||
                    "shiny",
 | 
			
		||||
                ],
 | 
			
		||||
                "D": np.random.randn(11),
 | 
			
		||||
                "E": np.random.randn(11),
 | 
			
		||||
                "F": np.random.randn(11),
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        self.df = pd.concat([df, df], ignore_index=True)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_single(self):
 | 
			
		||||
        df = self.df
 | 
			
		||||
        result = crosstab(df["A"], df["C"])
 | 
			
		||||
        expected = df.groupby(["A", "C"]).size().unstack()
 | 
			
		||||
        tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64))
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_multiple(self):
 | 
			
		||||
        df = self.df
 | 
			
		||||
 | 
			
		||||
        result = crosstab(df["A"], [df["B"], df["C"]])
 | 
			
		||||
        expected = df.groupby(["A", "B", "C"]).size()
 | 
			
		||||
        expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64)
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
        result = crosstab([df["B"], df["C"]], df["A"])
 | 
			
		||||
        expected = df.groupby(["B", "C", "A"]).size()
 | 
			
		||||
        expected = expected.unstack("A").fillna(0).astype(np.int64)
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    @pytest.mark.parametrize("box", [np.array, list, tuple])
 | 
			
		||||
    def test_crosstab_ndarray(self, box):
 | 
			
		||||
        # GH 44076
 | 
			
		||||
        a = box(np.random.randint(0, 5, size=100))
 | 
			
		||||
        b = box(np.random.randint(0, 3, size=100))
 | 
			
		||||
        c = box(np.random.randint(0, 10, size=100))
 | 
			
		||||
 | 
			
		||||
        df = DataFrame({"a": a, "b": b, "c": c})
 | 
			
		||||
 | 
			
		||||
        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"))
 | 
			
		||||
        expected = crosstab(df["a"], [df["b"], df["c"]])
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
        result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c"))
 | 
			
		||||
        expected = crosstab([df["b"], df["c"]], df["a"])
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
        # assign arbitrary names
 | 
			
		||||
        result = crosstab(a, c)
 | 
			
		||||
        expected = crosstab(df["a"], df["c"])
 | 
			
		||||
        expected.index.names = ["row_0"]
 | 
			
		||||
        expected.columns.names = ["col_0"]
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_non_aligned(self):
 | 
			
		||||
        # GH 17005
 | 
			
		||||
        a = Series([0, 1, 1], index=["a", "b", "c"])
 | 
			
		||||
        b = Series([3, 4, 3, 4, 3], index=["a", "b", "c", "d", "f"])
 | 
			
		||||
        c = np.array([3, 4, 3])
 | 
			
		||||
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            [[1, 0], [1, 1]],
 | 
			
		||||
            index=Index([0, 1], name="row_0"),
 | 
			
		||||
            columns=Index([3, 4], name="col_0"),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        result = crosstab(a, b)
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
        result = crosstab(a, c)
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_margins(self):
 | 
			
		||||
        a = np.random.randint(0, 7, size=100)
 | 
			
		||||
        b = np.random.randint(0, 3, size=100)
 | 
			
		||||
        c = np.random.randint(0, 5, size=100)
 | 
			
		||||
 | 
			
		||||
        df = DataFrame({"a": a, "b": b, "c": c})
 | 
			
		||||
 | 
			
		||||
        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True)
 | 
			
		||||
 | 
			
		||||
        assert result.index.names == ("a",)
 | 
			
		||||
        assert result.columns.names == ["b", "c"]
 | 
			
		||||
 | 
			
		||||
        all_cols = result["All", ""]
 | 
			
		||||
        exp_cols = df.groupby(["a"]).size().astype("i8")
 | 
			
		||||
        # to keep index.name
 | 
			
		||||
        exp_margin = Series([len(df)], index=Index(["All"], name="a"))
 | 
			
		||||
        exp_cols = pd.concat([exp_cols, exp_margin])
 | 
			
		||||
        exp_cols.name = ("All", "")
 | 
			
		||||
 | 
			
		||||
        tm.assert_series_equal(all_cols, exp_cols)
 | 
			
		||||
 | 
			
		||||
        all_rows = result.loc["All"]
 | 
			
		||||
        exp_rows = df.groupby(["b", "c"]).size().astype("i8")
 | 
			
		||||
        exp_rows = pd.concat([exp_rows, Series([len(df)], index=[("All", "")])])
 | 
			
		||||
        exp_rows.name = "All"
 | 
			
		||||
 | 
			
		||||
        exp_rows = exp_rows.reindex(all_rows.index)
 | 
			
		||||
        exp_rows = exp_rows.fillna(0).astype(np.int64)
 | 
			
		||||
        tm.assert_series_equal(all_rows, exp_rows)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_margins_set_margin_name(self):
 | 
			
		||||
        # GH 15972
 | 
			
		||||
        a = np.random.randint(0, 7, size=100)
 | 
			
		||||
        b = np.random.randint(0, 3, size=100)
 | 
			
		||||
        c = np.random.randint(0, 5, size=100)
 | 
			
		||||
 | 
			
		||||
        df = DataFrame({"a": a, "b": b, "c": c})
 | 
			
		||||
 | 
			
		||||
        result = crosstab(
 | 
			
		||||
            a,
 | 
			
		||||
            [b, c],
 | 
			
		||||
            rownames=["a"],
 | 
			
		||||
            colnames=("b", "c"),
 | 
			
		||||
            margins=True,
 | 
			
		||||
            margins_name="TOTAL",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        assert result.index.names == ("a",)
 | 
			
		||||
        assert result.columns.names == ["b", "c"]
 | 
			
		||||
 | 
			
		||||
        all_cols = result["TOTAL", ""]
 | 
			
		||||
        exp_cols = df.groupby(["a"]).size().astype("i8")
 | 
			
		||||
        # to keep index.name
 | 
			
		||||
        exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a"))
 | 
			
		||||
        exp_cols = pd.concat([exp_cols, exp_margin])
 | 
			
		||||
        exp_cols.name = ("TOTAL", "")
 | 
			
		||||
 | 
			
		||||
        tm.assert_series_equal(all_cols, exp_cols)
 | 
			
		||||
 | 
			
		||||
        all_rows = result.loc["TOTAL"]
 | 
			
		||||
        exp_rows = df.groupby(["b", "c"]).size().astype("i8")
 | 
			
		||||
        exp_rows = pd.concat([exp_rows, Series([len(df)], index=[("TOTAL", "")])])
 | 
			
		||||
        exp_rows.name = "TOTAL"
 | 
			
		||||
 | 
			
		||||
        exp_rows = exp_rows.reindex(all_rows.index)
 | 
			
		||||
        exp_rows = exp_rows.fillna(0).astype(np.int64)
 | 
			
		||||
        tm.assert_series_equal(all_rows, exp_rows)
 | 
			
		||||
 | 
			
		||||
        msg = "margins_name argument must be a string"
 | 
			
		||||
        for margins_name in [666, None, ["a", "b"]]:
 | 
			
		||||
            with pytest.raises(ValueError, match=msg):
 | 
			
		||||
                crosstab(
 | 
			
		||||
                    a,
 | 
			
		||||
                    [b, c],
 | 
			
		||||
                    rownames=["a"],
 | 
			
		||||
                    colnames=("b", "c"),
 | 
			
		||||
                    margins=True,
 | 
			
		||||
                    margins_name=margins_name,
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_pass_values(self):
 | 
			
		||||
        a = np.random.randint(0, 7, size=100)
 | 
			
		||||
        b = np.random.randint(0, 3, size=100)
 | 
			
		||||
        c = np.random.randint(0, 5, size=100)
 | 
			
		||||
        values = np.random.randn(100)
 | 
			
		||||
 | 
			
		||||
        table = crosstab(
 | 
			
		||||
            [a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"]
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values})
 | 
			
		||||
 | 
			
		||||
        expected = df.pivot_table(
 | 
			
		||||
            "values", index=["foo", "bar"], columns="baz", aggfunc=np.sum
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(table, expected)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_dropna(self):
 | 
			
		||||
        # GH 3820
 | 
			
		||||
        a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
 | 
			
		||||
        b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object)
 | 
			
		||||
        c = np.array(
 | 
			
		||||
            ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object
 | 
			
		||||
        )
 | 
			
		||||
        res = crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False)
 | 
			
		||||
        m = MultiIndex.from_tuples(
 | 
			
		||||
            [("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")],
 | 
			
		||||
            names=["b", "c"],
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_index_equal(res.columns, m)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_no_overlap(self):
 | 
			
		||||
        # GS 10291
 | 
			
		||||
 | 
			
		||||
        s1 = Series([1, 2, 3], index=[1, 2, 3])
 | 
			
		||||
        s2 = Series([4, 5, 6], index=[4, 5, 6])
 | 
			
		||||
 | 
			
		||||
        actual = crosstab(s1, s2)
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            index=Index([], dtype="int64", name="row_0"),
 | 
			
		||||
            columns=Index([], dtype="int64", name="col_0"),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        tm.assert_frame_equal(actual, expected)
 | 
			
		||||
 | 
			
		||||
    def test_margin_dropna(self):
 | 
			
		||||
        # GH 12577
 | 
			
		||||
        # pivot_table counts null into margin ('All')
 | 
			
		||||
        # when margins=true and dropna=true
 | 
			
		||||
 | 
			
		||||
        df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
 | 
			
		||||
        actual = crosstab(df.a, df.b, margins=True, dropna=True)
 | 
			
		||||
        expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]])
 | 
			
		||||
        expected.index = Index([1.0, 2.0, "All"], name="a")
 | 
			
		||||
        expected.columns = Index([3, 4, "All"], name="b")
 | 
			
		||||
        tm.assert_frame_equal(actual, expected)
 | 
			
		||||
 | 
			
		||||
    def test_margin_dropna2(self):
 | 
			
		||||
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
 | 
			
		||||
        )
 | 
			
		||||
        actual = crosstab(df.a, df.b, margins=True, dropna=True)
 | 
			
		||||
        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
 | 
			
		||||
        expected.index = Index([1.0, 2.0, "All"], name="a")
 | 
			
		||||
        expected.columns = Index([3.0, 4.0, "All"], name="b")
 | 
			
		||||
        tm.assert_frame_equal(actual, expected)
 | 
			
		||||
 | 
			
		||||
    def test_margin_dropna3(self):
 | 
			
		||||
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]}
 | 
			
		||||
        )
 | 
			
		||||
        actual = crosstab(df.a, df.b, margins=True, dropna=True)
 | 
			
		||||
        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
 | 
			
		||||
        expected.index = Index([1.0, 2.0, "All"], name="a")
 | 
			
		||||
        expected.columns = Index([3, 4, "All"], name="b")
 | 
			
		||||
        tm.assert_frame_equal(actual, expected)
 | 
			
		||||
 | 
			
		||||
    def test_margin_dropna4(self):
 | 
			
		||||
        # GH 12642
 | 
			
		||||
        # _add_margins raises KeyError: Level None not found
 | 
			
		||||
        # when margins=True and dropna=False
 | 
			
		||||
        df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
 | 
			
		||||
        actual = crosstab(df.a, df.b, margins=True, dropna=False)
 | 
			
		||||
        expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
 | 
			
		||||
        expected.index = Index([1.0, 2.0, "All"], name="a")
 | 
			
		||||
        expected.columns = Index([3, 4, "All"], name="b")
 | 
			
		||||
        tm.assert_frame_equal(actual, expected)
 | 
			
		||||
 | 
			
		||||
    def test_margin_dropna5(self):
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
 | 
			
		||||
        )
 | 
			
		||||
        actual = crosstab(df.a, df.b, margins=True, dropna=False)
 | 
			
		||||
        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
 | 
			
		||||
        expected.index = Index([1.0, 2.0, "All"], name="a")
 | 
			
		||||
        expected.columns = Index([3.0, 4.0, "All"], name="b")
 | 
			
		||||
        tm.assert_frame_equal(actual, expected)
 | 
			
		||||
 | 
			
		||||
    def test_margin_dropna6(self):
 | 
			
		||||
        a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
 | 
			
		||||
        b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object)
 | 
			
		||||
        c = np.array(
 | 
			
		||||
            ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        actual = crosstab(
 | 
			
		||||
            a, [b, c], rownames=["a"], colnames=["b", "c"], margins=True, dropna=False
 | 
			
		||||
        )
 | 
			
		||||
        m = MultiIndex.from_arrays(
 | 
			
		||||
            [
 | 
			
		||||
                ["one", "one", "two", "two", "All"],
 | 
			
		||||
                ["dull", "shiny", "dull", "shiny", ""],
 | 
			
		||||
            ],
 | 
			
		||||
            names=["b", "c"],
 | 
			
		||||
        )
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            [[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m
 | 
			
		||||
        )
 | 
			
		||||
        expected.index = Index(["bar", "foo", "All"], name="a")
 | 
			
		||||
        tm.assert_frame_equal(actual, expected)
 | 
			
		||||
 | 
			
		||||
        actual = crosstab(
 | 
			
		||||
            [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False
 | 
			
		||||
        )
 | 
			
		||||
        m = MultiIndex.from_arrays(
 | 
			
		||||
            [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
 | 
			
		||||
            names=["a", "b"],
 | 
			
		||||
        )
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m
 | 
			
		||||
        )
 | 
			
		||||
        expected.columns = Index(["dull", "shiny", "All"], name="c")
 | 
			
		||||
        tm.assert_frame_equal(actual, expected)
 | 
			
		||||
 | 
			
		||||
        actual = crosstab(
 | 
			
		||||
            [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=True
 | 
			
		||||
        )
 | 
			
		||||
        m = MultiIndex.from_arrays(
 | 
			
		||||
            [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
 | 
			
		||||
            names=["a", "b"],
 | 
			
		||||
        )
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 1, 6]], index=m
 | 
			
		||||
        )
 | 
			
		||||
        expected.columns = Index(["dull", "shiny", "All"], name="c")
 | 
			
		||||
        tm.assert_frame_equal(actual, expected)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_normalize(self):
 | 
			
		||||
        # Issue 12578
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        rindex = Index([1, 2], name="a")
 | 
			
		||||
        cindex = Index([3, 4], name="b")
 | 
			
		||||
        full_normal = DataFrame([[0.2, 0], [0.2, 0.6]], index=rindex, columns=cindex)
 | 
			
		||||
        row_normal = DataFrame([[1.0, 0], [0.25, 0.75]], index=rindex, columns=cindex)
 | 
			
		||||
        col_normal = DataFrame([[0.5, 0], [0.5, 1.0]], index=rindex, columns=cindex)
 | 
			
		||||
 | 
			
		||||
        # Check all normalize args
 | 
			
		||||
        tm.assert_frame_equal(crosstab(df.a, df.b, normalize="all"), full_normal)
 | 
			
		||||
        tm.assert_frame_equal(crosstab(df.a, df.b, normalize=True), full_normal)
 | 
			
		||||
        tm.assert_frame_equal(crosstab(df.a, df.b, normalize="index"), row_normal)
 | 
			
		||||
        tm.assert_frame_equal(crosstab(df.a, df.b, normalize="columns"), col_normal)
 | 
			
		||||
        tm.assert_frame_equal(
 | 
			
		||||
            crosstab(df.a, df.b, normalize=1),
 | 
			
		||||
            crosstab(df.a, df.b, normalize="columns"),
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(
 | 
			
		||||
            crosstab(df.a, df.b, normalize=0), crosstab(df.a, df.b, normalize="index")
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        row_normal_margins = DataFrame(
 | 
			
		||||
            [[1.0, 0], [0.25, 0.75], [0.4, 0.6]],
 | 
			
		||||
            index=Index([1, 2, "All"], name="a", dtype="object"),
 | 
			
		||||
            columns=Index([3, 4], name="b", dtype="object"),
 | 
			
		||||
        )
 | 
			
		||||
        col_normal_margins = DataFrame(
 | 
			
		||||
            [[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
 | 
			
		||||
            index=Index([1, 2], name="a", dtype="object"),
 | 
			
		||||
            columns=Index([3, 4, "All"], name="b", dtype="object"),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        all_normal_margins = DataFrame(
 | 
			
		||||
            [[0.2, 0, 0.2], [0.2, 0.6, 0.8], [0.4, 0.6, 1]],
 | 
			
		||||
            index=Index([1, 2, "All"], name="a", dtype="object"),
 | 
			
		||||
            columns=Index([3, 4, "All"], name="b", dtype="object"),
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(
 | 
			
		||||
            crosstab(df.a, df.b, normalize="index", margins=True), row_normal_margins
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(
 | 
			
		||||
            crosstab(df.a, df.b, normalize="columns", margins=True), col_normal_margins
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(
 | 
			
		||||
            crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_normalize_arrays(self):
 | 
			
		||||
        # GH#12578
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Test arrays
 | 
			
		||||
        crosstab(
 | 
			
		||||
            [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2])
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Test with aggfunc
 | 
			
		||||
        norm_counts = DataFrame(
 | 
			
		||||
            [[0.25, 0, 0.25], [0.25, 0.5, 0.75], [0.5, 0.5, 1]],
 | 
			
		||||
            index=Index([1, 2, "All"], name="a", dtype="object"),
 | 
			
		||||
            columns=Index([3, 4, "All"], name="b"),
 | 
			
		||||
        )
 | 
			
		||||
        test_case = crosstab(
 | 
			
		||||
            df.a, df.b, df.c, aggfunc="count", normalize="all", margins=True
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(test_case, norm_counts)
 | 
			
		||||
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [0, 4, np.nan, 3, 3]}
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        norm_sum = DataFrame(
 | 
			
		||||
            [[0, 0, 0.0], [0.4, 0.6, 1], [0.4, 0.6, 1]],
 | 
			
		||||
            index=Index([1, 2, "All"], name="a", dtype="object"),
 | 
			
		||||
            columns=Index([3, 4, "All"], name="b", dtype="object"),
 | 
			
		||||
        )
 | 
			
		||||
        test_case = crosstab(
 | 
			
		||||
            df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(test_case, norm_sum)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_with_empties(self, using_array_manager):
 | 
			
		||||
        # Check handling of empties
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {
 | 
			
		||||
                "a": [1, 2, 2, 2, 2],
 | 
			
		||||
                "b": [3, 3, 4, 4, 4],
 | 
			
		||||
                "c": [np.nan, np.nan, np.nan, np.nan, np.nan],
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        empty = DataFrame(
 | 
			
		||||
            [[0.0, 0.0], [0.0, 0.0]],
 | 
			
		||||
            index=Index([1, 2], name="a", dtype="int64"),
 | 
			
		||||
            columns=Index([3, 4], name="b"),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        for i in [True, "index", "columns"]:
 | 
			
		||||
            calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=i)
 | 
			
		||||
            tm.assert_frame_equal(empty, calculated)
 | 
			
		||||
 | 
			
		||||
        nans = DataFrame(
 | 
			
		||||
            [[0.0, np.nan], [0.0, 0.0]],
 | 
			
		||||
            index=Index([1, 2], name="a", dtype="int64"),
 | 
			
		||||
            columns=Index([3, 4], name="b"),
 | 
			
		||||
        )
 | 
			
		||||
        if using_array_manager:
 | 
			
		||||
            # INFO(ArrayManager) column without NaNs can preserve int dtype
 | 
			
		||||
            nans[3] = nans[3].astype("int64")
 | 
			
		||||
 | 
			
		||||
        calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=False)
 | 
			
		||||
        tm.assert_frame_equal(nans, calculated)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_errors(self):
 | 
			
		||||
        # Issue 12578
 | 
			
		||||
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        error = "values cannot be used without an aggfunc."
 | 
			
		||||
        with pytest.raises(ValueError, match=error):
 | 
			
		||||
            crosstab(df.a, df.b, values=df.c)
 | 
			
		||||
 | 
			
		||||
        error = "aggfunc cannot be used without values"
 | 
			
		||||
        with pytest.raises(ValueError, match=error):
 | 
			
		||||
            crosstab(df.a, df.b, aggfunc=np.mean)
 | 
			
		||||
 | 
			
		||||
        error = "Not a valid normalize argument"
 | 
			
		||||
        with pytest.raises(ValueError, match=error):
 | 
			
		||||
            crosstab(df.a, df.b, normalize="42")
 | 
			
		||||
 | 
			
		||||
        with pytest.raises(ValueError, match=error):
 | 
			
		||||
            crosstab(df.a, df.b, normalize=42)
 | 
			
		||||
 | 
			
		||||
        error = "Not a valid margins argument"
 | 
			
		||||
        with pytest.raises(ValueError, match=error):
 | 
			
		||||
            crosstab(df.a, df.b, normalize="all", margins=42)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_with_categorial_columns(self):
 | 
			
		||||
        # GH 8860
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {
 | 
			
		||||
                "MAKE": ["Honda", "Acura", "Tesla", "Honda", "Honda", "Acura"],
 | 
			
		||||
                "MODEL": ["Sedan", "Sedan", "Electric", "Pickup", "Sedan", "Sedan"],
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
        categories = ["Sedan", "Electric", "Pickup"]
 | 
			
		||||
        df["MODEL"] = df["MODEL"].astype("category").cat.set_categories(categories)
 | 
			
		||||
        result = crosstab(df["MAKE"], df["MODEL"])
 | 
			
		||||
 | 
			
		||||
        expected_index = Index(["Acura", "Honda", "Tesla"], name="MAKE")
 | 
			
		||||
        expected_columns = CategoricalIndex(
 | 
			
		||||
            categories, categories=categories, ordered=False, name="MODEL"
 | 
			
		||||
        )
 | 
			
		||||
        expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]]
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            expected_data, index=expected_index, columns=expected_columns
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_with_numpy_size(self):
 | 
			
		||||
        # GH 4003
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {
 | 
			
		||||
                "A": ["one", "one", "two", "three"] * 6,
 | 
			
		||||
                "B": ["A", "B", "C"] * 8,
 | 
			
		||||
                "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
 | 
			
		||||
                "D": np.random.randn(24),
 | 
			
		||||
                "E": np.random.randn(24),
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
        result = crosstab(
 | 
			
		||||
            index=[df["A"], df["B"]],
 | 
			
		||||
            columns=[df["C"]],
 | 
			
		||||
            margins=True,
 | 
			
		||||
            aggfunc=np.size,
 | 
			
		||||
            values=df["D"],
 | 
			
		||||
        )
 | 
			
		||||
        expected_index = MultiIndex(
 | 
			
		||||
            levels=[["All", "one", "three", "two"], ["", "A", "B", "C"]],
 | 
			
		||||
            codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]],
 | 
			
		||||
            names=["A", "B"],
 | 
			
		||||
        )
 | 
			
		||||
        expected_column = Index(["bar", "foo", "All"], dtype="object", name="C")
 | 
			
		||||
        expected_data = np.array(
 | 
			
		||||
            [
 | 
			
		||||
                [2.0, 2.0, 4.0],
 | 
			
		||||
                [2.0, 2.0, 4.0],
 | 
			
		||||
                [2.0, 2.0, 4.0],
 | 
			
		||||
                [2.0, np.nan, 2.0],
 | 
			
		||||
                [np.nan, 2.0, 2.0],
 | 
			
		||||
                [2.0, np.nan, 2.0],
 | 
			
		||||
                [np.nan, 2.0, 2.0],
 | 
			
		||||
                [2.0, np.nan, 2.0],
 | 
			
		||||
                [np.nan, 2.0, 2.0],
 | 
			
		||||
                [12.0, 12.0, 24.0],
 | 
			
		||||
            ]
 | 
			
		||||
        )
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            expected_data, index=expected_index, columns=expected_column
 | 
			
		||||
        )
 | 
			
		||||
        # aggfunc is np.size, resulting in integers
 | 
			
		||||
        expected["All"] = expected["All"].astype("int64")
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_duplicate_names(self):
 | 
			
		||||
        # GH 13279 / 22529
 | 
			
		||||
 | 
			
		||||
        s1 = Series(range(3), name="foo")
 | 
			
		||||
        s2_foo = Series(range(1, 4), name="foo")
 | 
			
		||||
        s2_bar = Series(range(1, 4), name="bar")
 | 
			
		||||
        s3 = Series(range(3), name="waldo")
 | 
			
		||||
 | 
			
		||||
        # check result computed with duplicate labels against
 | 
			
		||||
        # result computed with unique labels, then relabelled
 | 
			
		||||
        mapper = {"bar": "foo"}
 | 
			
		||||
 | 
			
		||||
        # duplicate row, column labels
 | 
			
		||||
        result = crosstab(s1, s2_foo)
 | 
			
		||||
        expected = crosstab(s1, s2_bar).rename_axis(columns=mapper, axis=1)
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
        # duplicate row, unique column labels
 | 
			
		||||
        result = crosstab([s1, s2_foo], s3)
 | 
			
		||||
        expected = crosstab([s1, s2_bar], s3).rename_axis(index=mapper, axis=0)
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
        # unique row, duplicate column labels
 | 
			
		||||
        result = crosstab(s3, [s1, s2_foo])
 | 
			
		||||
        expected = crosstab(s3, [s1, s2_bar]).rename_axis(columns=mapper, axis=1)
 | 
			
		||||
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    @pytest.mark.parametrize("names", [["a", ("b", "c")], [("a", "b"), "c"]])
 | 
			
		||||
    def test_crosstab_tuple_name(self, names):
 | 
			
		||||
        s1 = Series(range(3), name=names[0])
 | 
			
		||||
        s2 = Series(range(1, 4), name=names[1])
 | 
			
		||||
 | 
			
		||||
        mi = MultiIndex.from_arrays([range(3), range(1, 4)], names=names)
 | 
			
		||||
        expected = Series(1, index=mi).unstack(1, fill_value=0)
 | 
			
		||||
 | 
			
		||||
        result = crosstab(s1, s2)
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_both_tuple_names(self):
 | 
			
		||||
        # GH 18321
 | 
			
		||||
        s1 = Series(range(3), name=("a", "b"))
 | 
			
		||||
        s2 = Series(range(3), name=("c", "d"))
 | 
			
		||||
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            np.eye(3, dtype="int64"),
 | 
			
		||||
            index=Index(range(3), name=("a", "b")),
 | 
			
		||||
            columns=Index(range(3), name=("c", "d")),
 | 
			
		||||
        )
 | 
			
		||||
        result = crosstab(s1, s2)
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_unsorted_order(self):
 | 
			
		||||
        df = DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"])
 | 
			
		||||
        result = crosstab(df.index, [df.b, df.a])
 | 
			
		||||
        e_idx = Index(["A", "B", "C"], name="row_0")
 | 
			
		||||
        e_columns = MultiIndex.from_tuples([(1, 4), (2, 6), (3, 5)], names=["b", "a"])
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            [[1, 0, 0], [0, 1, 0], [0, 0, 1]], index=e_idx, columns=e_columns
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_crosstab_normalize_multiple_columns(self):
 | 
			
		||||
        # GH 15150
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {
 | 
			
		||||
                "A": ["one", "one", "two", "three"] * 6,
 | 
			
		||||
                "B": ["A", "B", "C"] * 8,
 | 
			
		||||
                "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
 | 
			
		||||
                "D": [0] * 24,
 | 
			
		||||
                "E": [0] * 24,
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
        result = crosstab(
 | 
			
		||||
            [df.A, df.B],
 | 
			
		||||
            df.C,
 | 
			
		||||
            values=df.D,
 | 
			
		||||
            aggfunc=np.sum,
 | 
			
		||||
            normalize=True,
 | 
			
		||||
            margins=True,
 | 
			
		||||
        )
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            np.array([0] * 29 + [1], dtype=float).reshape(10, 3),
 | 
			
		||||
            columns=Index(["bar", "foo", "All"], dtype="object", name="C"),
 | 
			
		||||
            index=MultiIndex.from_tuples(
 | 
			
		||||
                [
 | 
			
		||||
                    ("one", "A"),
 | 
			
		||||
                    ("one", "B"),
 | 
			
		||||
                    ("one", "C"),
 | 
			
		||||
                    ("three", "A"),
 | 
			
		||||
                    ("three", "B"),
 | 
			
		||||
                    ("three", "C"),
 | 
			
		||||
                    ("two", "A"),
 | 
			
		||||
                    ("two", "B"),
 | 
			
		||||
                    ("two", "C"),
 | 
			
		||||
                    ("All", ""),
 | 
			
		||||
                ],
 | 
			
		||||
                names=["A", "B"],
 | 
			
		||||
            ),
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_margin_normalize(self):
 | 
			
		||||
        # GH 27500
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {
 | 
			
		||||
                "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
 | 
			
		||||
                "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
 | 
			
		||||
                "C": [
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "large",
 | 
			
		||||
                    "large",
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "large",
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "large",
 | 
			
		||||
                ],
 | 
			
		||||
                "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
 | 
			
		||||
                "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
        # normalize on index
 | 
			
		||||
        result = crosstab(
 | 
			
		||||
            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=0
 | 
			
		||||
        )
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            [[0.5, 0.5], [0.5, 0.5], [0.666667, 0.333333], [0, 1], [0.444444, 0.555556]]
 | 
			
		||||
        )
 | 
			
		||||
        expected.index = MultiIndex(
 | 
			
		||||
            levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]],
 | 
			
		||||
            codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]],
 | 
			
		||||
            names=["A", "B"],
 | 
			
		||||
        )
 | 
			
		||||
        expected.columns = Index(["large", "small"], dtype="object", name="C")
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
        # normalize on columns
 | 
			
		||||
        result = crosstab(
 | 
			
		||||
            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=1
 | 
			
		||||
        )
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            [
 | 
			
		||||
                [0.25, 0.2, 0.222222],
 | 
			
		||||
                [0.25, 0.2, 0.222222],
 | 
			
		||||
                [0.5, 0.2, 0.333333],
 | 
			
		||||
                [0, 0.4, 0.222222],
 | 
			
		||||
            ]
 | 
			
		||||
        )
 | 
			
		||||
        expected.columns = Index(
 | 
			
		||||
            ["large", "small", "Sub-Total"], dtype="object", name="C"
 | 
			
		||||
        )
 | 
			
		||||
        expected.index = MultiIndex(
 | 
			
		||||
            levels=[["bar", "foo"], ["one", "two"]],
 | 
			
		||||
            codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
 | 
			
		||||
            names=["A", "B"],
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
        # normalize on both index and column
 | 
			
		||||
        result = crosstab(
 | 
			
		||||
            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=True
 | 
			
		||||
        )
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            [
 | 
			
		||||
                [0.111111, 0.111111, 0.222222],
 | 
			
		||||
                [0.111111, 0.111111, 0.222222],
 | 
			
		||||
                [0.222222, 0.111111, 0.333333],
 | 
			
		||||
                [0.000000, 0.222222, 0.222222],
 | 
			
		||||
                [0.444444, 0.555555, 1],
 | 
			
		||||
            ]
 | 
			
		||||
        )
 | 
			
		||||
        expected.columns = Index(
 | 
			
		||||
            ["large", "small", "Sub-Total"], dtype="object", name="C"
 | 
			
		||||
        )
 | 
			
		||||
        expected.index = MultiIndex(
 | 
			
		||||
            levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]],
 | 
			
		||||
            codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]],
 | 
			
		||||
            names=["A", "B"],
 | 
			
		||||
        )
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_margin_normalize_multiple_columns(self):
 | 
			
		||||
        # GH 35144
 | 
			
		||||
        # use multiple columns with margins and normalization
 | 
			
		||||
        df = DataFrame(
 | 
			
		||||
            {
 | 
			
		||||
                "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
 | 
			
		||||
                "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
 | 
			
		||||
                "C": [
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "large",
 | 
			
		||||
                    "large",
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "large",
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "small",
 | 
			
		||||
                    "large",
 | 
			
		||||
                ],
 | 
			
		||||
                "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
 | 
			
		||||
                "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
 | 
			
		||||
            }
 | 
			
		||||
        )
 | 
			
		||||
        result = crosstab(
 | 
			
		||||
            index=df.C,
 | 
			
		||||
            columns=[df.A, df.B],
 | 
			
		||||
            margins=True,
 | 
			
		||||
            margins_name="margin",
 | 
			
		||||
            normalize=True,
 | 
			
		||||
        )
 | 
			
		||||
        expected = DataFrame(
 | 
			
		||||
            [
 | 
			
		||||
                [0.111111, 0.111111, 0.222222, 0.000000, 0.444444],
 | 
			
		||||
                [0.111111, 0.111111, 0.111111, 0.222222, 0.555556],
 | 
			
		||||
                [0.222222, 0.222222, 0.333333, 0.222222, 1.0],
 | 
			
		||||
            ],
 | 
			
		||||
            index=["large", "small", "margin"],
 | 
			
		||||
        )
 | 
			
		||||
        expected.columns = MultiIndex(
 | 
			
		||||
            levels=[["bar", "foo", "margin"], ["", "one", "two"]],
 | 
			
		||||
            codes=[[0, 0, 1, 1, 2], [1, 2, 1, 2, 0]],
 | 
			
		||||
            names=["A", "B"],
 | 
			
		||||
        )
 | 
			
		||||
        expected.index.name = "C"
 | 
			
		||||
        tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("a_dtype", ["category", "int64"])
 | 
			
		||||
@pytest.mark.parametrize("b_dtype", ["category", "int64"])
 | 
			
		||||
def test_categoricals(a_dtype, b_dtype):
 | 
			
		||||
    # https://github.com/pandas-dev/pandas/issues/37465
 | 
			
		||||
    g = np.random.RandomState(25982704)
 | 
			
		||||
    a = Series(g.randint(0, 3, size=100)).astype(a_dtype)
 | 
			
		||||
    b = Series(g.randint(0, 2, size=100)).astype(b_dtype)
 | 
			
		||||
    result = crosstab(a, b, margins=True, dropna=False)
 | 
			
		||||
    columns = Index([0, 1, "All"], dtype="object", name="col_0")
 | 
			
		||||
    index = Index([0, 1, 2, "All"], dtype="object", name="row_0")
 | 
			
		||||
    values = [[18, 16, 34], [18, 16, 34], [16, 16, 32], [52, 48, 100]]
 | 
			
		||||
    expected = DataFrame(values, index, columns)
 | 
			
		||||
    tm.assert_frame_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    # Verify when categorical does not have all values present
 | 
			
		||||
    a.loc[a == 1] = 2
 | 
			
		||||
    a_is_cat = is_categorical_dtype(a.dtype)
 | 
			
		||||
    assert not a_is_cat or a.value_counts().loc[1] == 0
 | 
			
		||||
    result = crosstab(a, b, margins=True, dropna=False)
 | 
			
		||||
    values = [[18, 16, 34], [0, 0, 0], [34, 32, 66], [52, 48, 100]]
 | 
			
		||||
    expected = DataFrame(values, index, columns)
 | 
			
		||||
    if not a_is_cat:
 | 
			
		||||
        expected = expected.loc[[0, 2, "All"]]
 | 
			
		||||
        expected["All"] = expected["All"].astype("int64")
 | 
			
		||||
    repr(result)
 | 
			
		||||
    repr(expected)
 | 
			
		||||
    repr(expected.loc[[0, 2, "All"]])
 | 
			
		||||
    tm.assert_frame_equal(result, expected)
 | 
			
		||||
		Reference in New Issue
	
	Block a user