针对pulse-transit的工具

2025-02-22 16:12:02 +08:00
commit 6bc25b4e3a
7719 changed files with 1530886 additions and 0 deletions
--- a/dist/client/pandas/tests/reshape/init.py
+++ b/dist/client/pandas/tests/reshape/init.py
--- a/dist/client/pandas/tests/reshape/pycache/init.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/init.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/pycache/test_crosstab.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/test_crosstab.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/pycache/test_cut.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/test_cut.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/pycache/test_get_dummies.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/test_get_dummies.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/pycache/test_melt.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/test_melt.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/pycache/test_pivot.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/test_pivot.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/pycache/test_pivot_multilevel.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/test_pivot_multilevel.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/pycache/test_qcut.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/test_qcut.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/pycache/test_union_categoricals.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/test_union_categoricals.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/pycache/test_util.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/pycache/test_util.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/init.py
+++ b/dist/client/pandas/tests/reshape/concat/init.py
--- a/dist/client/pandas/tests/reshape/concat/pycache/init.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/init.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/conftest.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/conftest.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_append.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_append.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_append_common.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_append_common.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_categorical.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_categorical.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_concat.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_concat.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_dataframe.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_dataframe.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_datetimes.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_datetimes.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_empty.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_empty.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_index.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_index.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_invalid.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_invalid.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_series.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_series.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/pycache/test_sort.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/concat/pycache/test_sort.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/concat/conftest.py
+++ b/dist/client/pandas/tests/reshape/concat/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+
+
+@pytest.fixture(params=[True, False])
+def sort(request):
+    """Boolean sort keyword for concat and DataFrame.append."""
+    return request.param
--- a/dist/client/pandas/tests/reshape/concat/test_append.py
+++ b/dist/client/pandas/tests/reshape/concat/test_append.py
@@ -0,0 +1,378 @@
+import datetime as dt
+from itertools import combinations
+
+import dateutil
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+    Timestamp,
+    concat,
+    isna,
+)
+import pandas._testing as tm
+
+
+class TestAppend:
+    def test_append(self, sort, float_frame):
+        mixed_frame = float_frame.copy()
+        mixed_frame["foo"] = "bar"
+
+        begin_index = float_frame.index[:5]
+        end_index = float_frame.index[5:]
+
+        begin_frame = float_frame.reindex(begin_index)
+        end_frame = float_frame.reindex(end_index)
+
+        appended = begin_frame._append(end_frame)
+        tm.assert_almost_equal(appended["A"], float_frame["A"])
+
+        del end_frame["A"]
+        partial_appended = begin_frame._append(end_frame, sort=sort)
+        assert "A" in partial_appended
+
+        partial_appended = end_frame._append(begin_frame, sort=sort)
+        assert "A" in partial_appended
+
+        # mixed type handling
+        appended = mixed_frame[:5]._append(mixed_frame[5:])
+        tm.assert_frame_equal(appended, mixed_frame)
+
+        # what to test here
+        mixed_appended = mixed_frame[:5]._append(float_frame[5:], sort=sort)
+        mixed_appended2 = float_frame[:5]._append(mixed_frame[5:], sort=sort)
+
+        # all equal except 'foo' column
+        tm.assert_frame_equal(
+            mixed_appended.reindex(columns=["A", "B", "C", "D"]),
+            mixed_appended2.reindex(columns=["A", "B", "C", "D"]),
+        )
+
+    def test_append_empty(self, float_frame):
+        empty = DataFrame()
+
+        appended = float_frame._append(empty)
+        tm.assert_frame_equal(float_frame, appended)
+        assert appended is not float_frame
+
+        appended = empty._append(float_frame)
+        tm.assert_frame_equal(float_frame, appended)
+        assert appended is not float_frame
+
+    def test_append_overlap_raises(self, float_frame):
+        msg = "Indexes have overlapping values"
+        with pytest.raises(ValueError, match=msg):
+            float_frame._append(float_frame, verify_integrity=True)
+
+    def test_append_new_columns(self):
+        # see gh-6129: new columns
+        df = DataFrame({"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}})
+        row = Series([5, 6, 7], index=["a", "b", "c"], name="z")
+        expected = DataFrame(
+            {
+                "a": {"x": 1, "y": 2, "z": 5},
+                "b": {"x": 3, "y": 4, "z": 6},
+                "c": {"z": 7},
+            }
+        )
+        result = df._append(row)
+        tm.assert_frame_equal(result, expected)
+
+    def test_append_length0_frame(self, sort):
+        df = DataFrame(columns=["A", "B", "C"])
+        df3 = DataFrame(index=[0, 1], columns=["A", "B"])
+        df5 = df._append(df3, sort=sort)
+
+        expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
+        tm.assert_frame_equal(df5, expected)
+
+    def test_append_records(self):
+        arr1 = np.zeros((2,), dtype=("i4,f4,a10"))
+        arr1[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
+
+        arr2 = np.zeros((3,), dtype=("i4,f4,a10"))
+        arr2[:] = [(3, 4.0, "foo"), (5, 6.0, "bar"), (7.0, 8.0, "baz")]
+
+        df1 = DataFrame(arr1)
+        df2 = DataFrame(arr2)
+
+        result = df1._append(df2, ignore_index=True)
+        expected = DataFrame(np.concatenate((arr1, arr2)))
+        tm.assert_frame_equal(result, expected)
+
+    # rewrite sort fixture, since we also want to test default of None
+    def test_append_sorts(self, sort):
+        df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
+        df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3])
+
+        result = df1._append(df2, sort=sort)
+
+        # for None / True
+        expected = DataFrame(
+            {"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]},
+            columns=["a", "b", "c"],
+        )
+        if sort is False:
+            expected = expected[["b", "a", "c"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_append_different_columns(self, sort):
+        df = DataFrame(
+            {
+                "bools": np.random.randn(10) > 0,
+                "ints": np.random.randint(0, 10, 10),
+                "floats": np.random.randn(10),
+                "strings": ["foo", "bar"] * 5,
+            }
+        )
+
+        a = df[:5].loc[:, ["bools", "ints", "floats"]]
+        b = df[5:].loc[:, ["strings", "ints", "floats"]]
+
+        appended = a._append(b, sort=sort)
+        assert isna(appended["strings"][0:4]).all()
+        assert isna(appended["bools"][5:]).all()
+
+    def test_append_many(self, sort, float_frame):
+        chunks = [
+            float_frame[:5],
+            float_frame[5:10],
+            float_frame[10:15],
+            float_frame[15:],
+        ]
+
+        result = chunks[0]._append(chunks[1:])
+        tm.assert_frame_equal(result, float_frame)
+
+        chunks[-1] = chunks[-1].copy()
+        chunks[-1]["foo"] = "bar"
+        result = chunks[0]._append(chunks[1:], sort=sort)
+        tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame)
+        assert (result["foo"][15:] == "bar").all()
+        assert result["foo"][:15].isna().all()
+
+    def test_append_preserve_index_name(self):
+        # #980
+        df1 = DataFrame(columns=["A", "B", "C"])
+        df1 = df1.set_index(["A"])
+        df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
+        df2 = df2.set_index(["A"])
+
+        result = df1._append(df2)
+        assert result.index.name == "A"
+
+    indexes_can_append = [
+        pd.RangeIndex(3),
+        Index([4, 5, 6]),
+        Index([4.5, 5.5, 6.5]),
+        Index(list("abc")),
+        pd.CategoricalIndex("A B C".split()),
+        pd.CategoricalIndex("D E F".split(), ordered=True),
+        pd.IntervalIndex.from_breaks([7, 8, 9, 10]),
+        pd.DatetimeIndex(
+            [
+                dt.datetime(2013, 1, 3, 0, 0),
+                dt.datetime(2013, 1, 3, 6, 10),
+                dt.datetime(2013, 1, 3, 7, 12),
+            ]
+        ),
+        pd.MultiIndex.from_arrays(["A B C".split(), "D E F".split()]),
+    ]
+
+    @pytest.mark.parametrize(
+        "index", indexes_can_append, ids=lambda x: type(x).__name__
+    )
+    def test_append_same_columns_type(self, index):
+        # GH18359
+
+        # df wider than ser
+        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index)
+        ser_index = index[:2]
+        ser = Series([7, 8], index=ser_index, name=2)
+        result = df._append(ser)
+        expected = DataFrame(
+            [[1, 2, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
+        )
+        # integer dtype is preserved for columns present in ser.index
+        assert expected.dtypes.iloc[0].kind == "i"
+        assert expected.dtypes.iloc[1].kind == "i"
+
+        tm.assert_frame_equal(result, expected)
+
+        # ser wider than df
+        ser_index = index
+        index = index[:2]
+        df = DataFrame([[1, 2], [4, 5]], columns=index)
+        ser = Series([7, 8, 9], index=ser_index, name=2)
+        result = df._append(ser)
+        expected = DataFrame(
+            [[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
+            index=[0, 1, 2],
+            columns=ser_index,
+        )
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "df_columns, series_index",
+        combinations(indexes_can_append, r=2),
+        ids=lambda x: type(x).__name__,
+    )
+    def test_append_different_columns_types(self, df_columns, series_index):
+        # GH18359
+        # See also test 'test_append_different_columns_types_raises' below
+        # for errors raised when appending
+
+        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
+        ser = Series([7, 8, 9], index=series_index, name=2)
+
+        result = df._append(ser)
+        idx_diff = ser.index.difference(df_columns)
+        combined_columns = Index(df_columns.tolist()).append(idx_diff)
+        expected = DataFrame(
+            [
+                [1.0, 2.0, 3.0, np.nan, np.nan, np.nan],
+                [4, 5, 6, np.nan, np.nan, np.nan],
+                [np.nan, np.nan, np.nan, 7, 8, 9],
+            ],
+            index=[0, 1, 2],
+            columns=combined_columns,
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_append_dtype_coerce(self, sort):
+
+        # GH 4993
+        # appending with datetime will incorrectly convert datetime64
+
+        df1 = DataFrame(
+            index=[1, 2],
+            data=[dt.datetime(2013, 1, 1, 0, 0), dt.datetime(2013, 1, 2, 0, 0)],
+            columns=["start_time"],
+        )
+        df2 = DataFrame(
+            index=[4, 5],
+            data=[
+                [dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10)],
+                [dt.datetime(2013, 1, 4, 0, 0), dt.datetime(2013, 1, 4, 7, 10)],
+            ],
+            columns=["start_time", "end_time"],
+        )
+
+        expected = concat(
+            [
+                Series(
+                    [
+                        pd.NaT,
+                        pd.NaT,
+                        dt.datetime(2013, 1, 3, 6, 10),
+                        dt.datetime(2013, 1, 4, 7, 10),
+                    ],
+                    name="end_time",
+                ),
+                Series(
+                    [
+                        dt.datetime(2013, 1, 1, 0, 0),
+                        dt.datetime(2013, 1, 2, 0, 0),
+                        dt.datetime(2013, 1, 3, 0, 0),
+                        dt.datetime(2013, 1, 4, 0, 0),
+                    ],
+                    name="start_time",
+                ),
+            ],
+            axis=1,
+            sort=sort,
+        )
+        result = df1._append(df2, ignore_index=True, sort=sort)
+        if sort:
+            expected = expected[["end_time", "start_time"]]
+        else:
+            expected = expected[["start_time", "end_time"]]
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_append_missing_column_proper_upcast(self, sort):
+        df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")})
+        df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)})
+
+        appended = df1._append(df2, ignore_index=True, sort=sort)
+        assert appended["A"].dtype == "f8"
+        assert appended["B"].dtype == "O"
+
+    def test_append_empty_frame_to_series_with_dateutil_tz(self):
+        # GH 23682
+        date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
+        ser = Series({"a": 1.0, "b": 2.0, "date": date})
+        df = DataFrame(columns=["c", "d"])
+        result_a = df._append(ser, ignore_index=True)
+        expected = DataFrame(
+            [[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
+        )
+        # These columns get cast to object after append
+        expected["c"] = expected["c"].astype(object)
+        expected["d"] = expected["d"].astype(object)
+        tm.assert_frame_equal(result_a, expected)
+
+        expected = DataFrame(
+            [[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
+        )
+        expected["c"] = expected["c"].astype(object)
+        expected["d"] = expected["d"].astype(object)
+        result_b = result_a._append(ser, ignore_index=True)
+        tm.assert_frame_equal(result_b, expected)
+
+        result = df._append([ser, ser], ignore_index=True)
+        tm.assert_frame_equal(result, expected)
+
+    def test_append_empty_tz_frame_with_datetime64ns(self):
+        # https://github.com/pandas-dev/pandas/issues/35460
+        df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
+
+        # pd.NaT gets inferred as tz-naive, so append result is tz-naive
+        result = df._append({"a": pd.NaT}, ignore_index=True)
+        expected = DataFrame({"a": [pd.NaT]}).astype(object)
+        tm.assert_frame_equal(result, expected)
+
+        # also test with typed value to append
+        df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
+        other = Series({"a": pd.NaT}, dtype="datetime64[ns]")
+        result = df._append(other, ignore_index=True)
+        expected = DataFrame({"a": [pd.NaT]}).astype(object)
+        tm.assert_frame_equal(result, expected)
+
+        # mismatched tz
+        other = Series({"a": pd.NaT}, dtype="datetime64[ns, US/Pacific]")
+        result = df._append(other, ignore_index=True)
+        expected = DataFrame({"a": [pd.NaT]}).astype(object)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"]
+    )
+    @pytest.mark.parametrize("val", [1, "NaT"])
+    def test_append_empty_frame_with_timedelta64ns_nat(self, dtype_str, val):
+        # https://github.com/pandas-dev/pandas/issues/35460
+        df = DataFrame(columns=["a"]).astype(dtype_str)
+
+        other = DataFrame({"a": [np.timedelta64(val, "ns")]})
+        result = df._append(other, ignore_index=True)
+
+        expected = other.astype(object)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "dtype_str", ["datetime64[ns, UTC]", "datetime64[ns]", "Int64", "int64"]
+    )
+    @pytest.mark.parametrize("val", [1, "NaT"])
+    def test_append_frame_with_timedelta64ns_nat(self, dtype_str, val):
+        # https://github.com/pandas-dev/pandas/issues/35460
+        df = DataFrame({"a": pd.array([1], dtype=dtype_str)})
+
+        other = DataFrame({"a": [np.timedelta64(val, "ns")]})
+        result = df._append(other, ignore_index=True)
+
+        expected = DataFrame({"a": [df.iloc[0, 0], other.iloc[0, 0]]}, dtype=object)
+        tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/concat/test_append_common.py
+++ b/dist/client/pandas/tests/reshape/concat/test_append_common.py
@@ -0,0 +1,765 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    DataFrame,
+    Index,
+    Series,
+)
+import pandas._testing as tm
+
+dt_data = [
+    pd.Timestamp("2011-01-01"),
+    pd.Timestamp("2011-01-02"),
+    pd.Timestamp("2011-01-03"),
+]
+tz_data = [
+    pd.Timestamp("2011-01-01", tz="US/Eastern"),
+    pd.Timestamp("2011-01-02", tz="US/Eastern"),
+    pd.Timestamp("2011-01-03", tz="US/Eastern"),
+]
+td_data = [
+    pd.Timedelta("1 days"),
+    pd.Timedelta("2 days"),
+    pd.Timedelta("3 days"),
+]
+period_data = [
+    pd.Period("2011-01", freq="M"),
+    pd.Period("2011-02", freq="M"),
+    pd.Period("2011-03", freq="M"),
+]
+data_dict = {
+    "bool": [True, False, True],
+    "int64": [1, 2, 3],
+    "float64": [1.1, np.nan, 3.3],
+    "category": Categorical(["X", "Y", "Z"]),
+    "object": ["a", "b", "c"],
+    "datetime64[ns]": dt_data,
+    "datetime64[ns, US/Eastern]": tz_data,
+    "timedelta64[ns]": td_data,
+    "period[M]": period_data,
+}
+
+
+class TestConcatAppendCommon:
+    """
+    Test common dtype coercion rules between concat and append.
+    """
+
+    @pytest.fixture(params=sorted(data_dict.keys()))
+    def item(self, request):
+        key = request.param
+        return key, data_dict[key]
+
+    item2 = item
+
+    def _check_expected_dtype(self, obj, label):
+        """
+        Check whether obj has expected dtype depending on label
+        considering not-supported dtypes
+        """
+        if isinstance(obj, Index):
+            if label == "bool":
+                assert obj.dtype == "object"
+            else:
+                assert obj.dtype == label
+        elif isinstance(obj, Series):
+            if label.startswith("period"):
+                assert obj.dtype == "Period[M]"
+            else:
+                assert obj.dtype == label
+        else:
+            raise ValueError
+
+    def test_dtypes(self, item):
+        # to confirm test case covers intended dtypes
+        typ, vals = item
+        self._check_expected_dtype(Index(vals), typ)
+        self._check_expected_dtype(Series(vals), typ)
+
+    def test_concatlike_same_dtypes(self, item):
+        # GH 13660
+        typ1, vals1 = item
+
+        vals2 = vals1
+        vals3 = vals1
+
+        if typ1 == "category":
+            exp_data = Categorical(list(vals1) + list(vals2))
+            exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3))
+        else:
+            exp_data = vals1 + vals2
+            exp_data3 = vals1 + vals2 + vals3
+
+        # ----- Index ----- #
+
+        # index.append
+        res = Index(vals1).append(Index(vals2))
+        exp = Index(exp_data)
+        tm.assert_index_equal(res, exp)
+
+        # 3 elements
+        res = Index(vals1).append([Index(vals2), Index(vals3)])
+        exp = Index(exp_data3)
+        tm.assert_index_equal(res, exp)
+
+        # index.append name mismatch
+        i1 = Index(vals1, name="x")
+        i2 = Index(vals2, name="y")
+        res = i1.append(i2)
+        exp = Index(exp_data)
+        tm.assert_index_equal(res, exp)
+
+        # index.append name match
+        i1 = Index(vals1, name="x")
+        i2 = Index(vals2, name="x")
+        res = i1.append(i2)
+        exp = Index(exp_data, name="x")
+        tm.assert_index_equal(res, exp)
+
+        # cannot append non-index
+        with pytest.raises(TypeError, match="all inputs must be Index"):
+            Index(vals1).append(vals2)
+
+        with pytest.raises(TypeError, match="all inputs must be Index"):
+            Index(vals1).append([Index(vals2), vals3])
+
+        # ----- Series ----- #
+
+        # series.append
+        res = Series(vals1)._append(Series(vals2), ignore_index=True)
+        exp = Series(exp_data)
+        tm.assert_series_equal(res, exp, check_index_type=True)
+
+        # concat
+        res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
+        tm.assert_series_equal(res, exp, check_index_type=True)
+
+        # 3 elements
+        res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
+        exp = Series(exp_data3)
+        tm.assert_series_equal(res, exp)
+
+        res = pd.concat(
+            [Series(vals1), Series(vals2), Series(vals3)],
+            ignore_index=True,
+        )
+        tm.assert_series_equal(res, exp)
+
+        # name mismatch
+        s1 = Series(vals1, name="x")
+        s2 = Series(vals2, name="y")
+        res = s1._append(s2, ignore_index=True)
+        exp = Series(exp_data)
+        tm.assert_series_equal(res, exp, check_index_type=True)
+
+        res = pd.concat([s1, s2], ignore_index=True)
+        tm.assert_series_equal(res, exp, check_index_type=True)
+
+        # name match
+        s1 = Series(vals1, name="x")
+        s2 = Series(vals2, name="x")
+        res = s1._append(s2, ignore_index=True)
+        exp = Series(exp_data, name="x")
+        tm.assert_series_equal(res, exp, check_index_type=True)
+
+        res = pd.concat([s1, s2], ignore_index=True)
+        tm.assert_series_equal(res, exp, check_index_type=True)
+
+        # cannot append non-index
+        msg = (
+            r"cannot concatenate object of type '.+'; "
+            "only Series and DataFrame objs are valid"
+        )
+        with pytest.raises(TypeError, match=msg):
+            Series(vals1)._append(vals2)
+
+        with pytest.raises(TypeError, match=msg):
+            Series(vals1)._append([Series(vals2), vals3])
+
+        with pytest.raises(TypeError, match=msg):
+            pd.concat([Series(vals1), vals2])
+
+        with pytest.raises(TypeError, match=msg):
+            pd.concat([Series(vals1), Series(vals2), vals3])
+
+    def test_concatlike_dtypes_coercion(self, item, item2):
+        # GH 13660
+        typ1, vals1 = item
+        typ2, vals2 = item2
+
+        vals3 = vals2
+
+        # basically infer
+        exp_index_dtype = None
+        exp_series_dtype = None
+
+        if typ1 == typ2:
+            # same dtype is tested in test_concatlike_same_dtypes
+            return
+        elif typ1 == "category" or typ2 == "category":
+            # The `vals1 + vals2` below fails bc one of these is a Categorical
+            #  instead of a list; we have separate dedicated tests for categorical
+            return
+
+        warn = None
+        # specify expected dtype
+        if typ1 == "bool" and typ2 in ("int64", "float64"):
+            # series coerces to numeric based on numpy rule
+            # index doesn't because bool is object dtype
+            exp_series_dtype = typ2
+            warn = FutureWarning
+        elif typ2 == "bool" and typ1 in ("int64", "float64"):
+            exp_series_dtype = typ1
+            warn = FutureWarning
+        elif (
+            typ1 == "datetime64[ns, US/Eastern]"
+            or typ2 == "datetime64[ns, US/Eastern]"
+            or typ1 == "timedelta64[ns]"
+            or typ2 == "timedelta64[ns]"
+        ):
+            exp_index_dtype = object
+            exp_series_dtype = object
+
+        exp_data = vals1 + vals2
+        exp_data3 = vals1 + vals2 + vals3
+
+        # ----- Index ----- #
+
+        # index.append
+        res = Index(vals1).append(Index(vals2))
+        exp = Index(exp_data, dtype=exp_index_dtype)
+        tm.assert_index_equal(res, exp)
+
+        # 3 elements
+        res = Index(vals1).append([Index(vals2), Index(vals3)])
+        exp = Index(exp_data3, dtype=exp_index_dtype)
+        tm.assert_index_equal(res, exp)
+
+        # ----- Series ----- #
+
+        # series._append
+        with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
+            # GH#39817
+            res = Series(vals1)._append(Series(vals2), ignore_index=True)
+        exp = Series(exp_data, dtype=exp_series_dtype)
+        tm.assert_series_equal(res, exp, check_index_type=True)
+
+        # concat
+        with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
+            # GH#39817
+            res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
+        tm.assert_series_equal(res, exp, check_index_type=True)
+
+        # 3 elements
+        with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
+            # GH#39817
+            res = Series(vals1)._append(
+                [Series(vals2), Series(vals3)], ignore_index=True
+            )
+        exp = Series(exp_data3, dtype=exp_series_dtype)
+        tm.assert_series_equal(res, exp)
+
+        with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
+            # GH#39817
+            res = pd.concat(
+                [Series(vals1), Series(vals2), Series(vals3)],
+                ignore_index=True,
+            )
+        tm.assert_series_equal(res, exp)
+
+    def test_concatlike_common_coerce_to_pandas_object(self):
+        # GH 13626
+        # result must be Timestamp/Timedelta, not datetime.datetime/timedelta
+        dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"])
+        tdi = pd.TimedeltaIndex(["1 days", "2 days"])
+
+        exp = Index(
+            [
+                pd.Timestamp("2011-01-01"),
+                pd.Timestamp("2011-01-02"),
+                pd.Timedelta("1 days"),
+                pd.Timedelta("2 days"),
+            ]
+        )
+
+        res = dti.append(tdi)
+        tm.assert_index_equal(res, exp)
+        assert isinstance(res[0], pd.Timestamp)
+        assert isinstance(res[-1], pd.Timedelta)
+
+        dts = Series(dti)
+        tds = Series(tdi)
+        res = dts._append(tds)
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+        assert isinstance(res.iloc[0], pd.Timestamp)
+        assert isinstance(res.iloc[-1], pd.Timedelta)
+
+        res = pd.concat([dts, tds])
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+        assert isinstance(res.iloc[0], pd.Timestamp)
+        assert isinstance(res.iloc[-1], pd.Timedelta)
+
+    def test_concatlike_datetimetz(self, tz_aware_fixture):
+        tz = tz_aware_fixture
+        # GH 7795
+        dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
+        dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz)
+
+        exp = pd.DatetimeIndex(
+            ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz
+        )
+
+        res = dti1.append(dti2)
+        tm.assert_index_equal(res, exp)
+
+        dts1 = Series(dti1)
+        dts2 = Series(dti2)
+        res = dts1._append(dts2)
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+        res = pd.concat([dts1, dts2])
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+    @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"])
+    def test_concatlike_datetimetz_short(self, tz):
+        # GH#7795
+        ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz)
+        ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz)
+        df1 = DataFrame(0, index=ix1, columns=["A", "B"])
+        df2 = DataFrame(0, index=ix2, columns=["A", "B"])
+
+        exp_idx = pd.DatetimeIndex(
+            ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"],
+            tz=tz,
+        )
+        exp = DataFrame(0, index=exp_idx, columns=["A", "B"])
+
+        tm.assert_frame_equal(df1._append(df2), exp)
+        tm.assert_frame_equal(pd.concat([df1, df2]), exp)
+
+    def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
+        tz = tz_aware_fixture
+        # GH 13660
+
+        # different tz coerces to object
+        dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
+        dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"])
+
+        exp = Index(
+            [
+                pd.Timestamp("2011-01-01", tz=tz),
+                pd.Timestamp("2011-01-02", tz=tz),
+                pd.Timestamp("2012-01-01"),
+                pd.Timestamp("2012-01-02"),
+            ],
+            dtype=object,
+        )
+
+        res = dti1.append(dti2)
+        tm.assert_index_equal(res, exp)
+
+        dts1 = Series(dti1)
+        dts2 = Series(dti2)
+        res = dts1._append(dts2)
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+        res = pd.concat([dts1, dts2])
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+        # different tz
+        dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific")
+
+        exp = Index(
+            [
+                pd.Timestamp("2011-01-01", tz=tz),
+                pd.Timestamp("2011-01-02", tz=tz),
+                pd.Timestamp("2012-01-01", tz="US/Pacific"),
+                pd.Timestamp("2012-01-02", tz="US/Pacific"),
+            ],
+            dtype=object,
+        )
+
+        res = dti1.append(dti3)
+        tm.assert_index_equal(res, exp)
+
+        dts1 = Series(dti1)
+        dts3 = Series(dti3)
+        res = dts1._append(dts3)
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+        res = pd.concat([dts1, dts3])
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+    def test_concatlike_common_period(self):
+        # GH 13660
+        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
+        pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M")
+
+        exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M")
+
+        res = pi1.append(pi2)
+        tm.assert_index_equal(res, exp)
+
+        ps1 = Series(pi1)
+        ps2 = Series(pi2)
+        res = ps1._append(ps2)
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+        res = pd.concat([ps1, ps2])
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+    def test_concatlike_common_period_diff_freq_to_object(self):
+        # GH 13221
+        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
+        pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D")
+
+        exp = Index(
+            [
+                pd.Period("2011-01", freq="M"),
+                pd.Period("2011-02", freq="M"),
+                pd.Period("2012-01-01", freq="D"),
+                pd.Period("2012-02-01", freq="D"),
+            ],
+            dtype=object,
+        )
+
+        res = pi1.append(pi2)
+        tm.assert_index_equal(res, exp)
+
+        ps1 = Series(pi1)
+        ps2 = Series(pi2)
+        res = ps1._append(ps2)
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+        res = pd.concat([ps1, ps2])
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+    def test_concatlike_common_period_mixed_dt_to_object(self):
+        # GH 13221
+        # different datetimelike
+        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
+        tdi = pd.TimedeltaIndex(["1 days", "2 days"])
+        exp = Index(
+            [
+                pd.Period("2011-01", freq="M"),
+                pd.Period("2011-02", freq="M"),
+                pd.Timedelta("1 days"),
+                pd.Timedelta("2 days"),
+            ],
+            dtype=object,
+        )
+
+        res = pi1.append(tdi)
+        tm.assert_index_equal(res, exp)
+
+        ps1 = Series(pi1)
+        tds = Series(tdi)
+        res = ps1._append(tds)
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+        res = pd.concat([ps1, tds])
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+        # inverse
+        exp = Index(
+            [
+                pd.Timedelta("1 days"),
+                pd.Timedelta("2 days"),
+                pd.Period("2011-01", freq="M"),
+                pd.Period("2011-02", freq="M"),
+            ],
+            dtype=object,
+        )
+
+        res = tdi.append(pi1)
+        tm.assert_index_equal(res, exp)
+
+        ps1 = Series(pi1)
+        tds = Series(tdi)
+        res = tds._append(ps1)
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+        res = pd.concat([tds, ps1])
+        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
+
+    def test_concat_categorical(self):
+        # GH 13524
+
+        # same categories -> category
+        s1 = Series([1, 2, np.nan], dtype="category")
+        s2 = Series([2, 1, 2], dtype="category")
+
+        exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category")
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        # partially different categories => not-category
+        s1 = Series([3, 2], dtype="category")
+        s2 = Series([2, 1], dtype="category")
+
+        exp = Series([3, 2, 2, 1])
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        # completely different categories (same dtype) => not-category
+        s1 = Series([10, 11, np.nan], dtype="category")
+        s2 = Series([np.nan, 1, 3, 2], dtype="category")
+
+        exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object")
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+    def test_union_categorical_same_categories_different_order(self):
+        # https://github.com/pandas-dev/pandas/issues/19096
+        a = Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"]))
+        b = Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"]))
+        result = pd.concat([a, b], ignore_index=True)
+        expected = Series(
+            Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"])
+        )
+        tm.assert_series_equal(result, expected)
+
+    def test_concat_categorical_coercion(self):
+        # GH 13524
+
+        # category + not-category => not-category
+        s1 = Series([1, 2, np.nan], dtype="category")
+        s2 = Series([2, 1, 2])
+
+        exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object")
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        # result shouldn't be affected by 1st elem dtype
+        exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object")
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
+
+        # all values are not in category => not-category
+        s1 = Series([3, 2], dtype="category")
+        s2 = Series([2, 1])
+
+        exp = Series([3, 2, 2, 1])
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        exp = Series([2, 1, 3, 2])
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
+
+        # completely different categories => not-category
+        s1 = Series([10, 11, np.nan], dtype="category")
+        s2 = Series([1, 3, 2])
+
+        exp = Series([10, 11, np.nan, 1, 3, 2], dtype="object")
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        exp = Series([1, 3, 2, 10, 11, np.nan], dtype="object")
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
+
+        # different dtype => not-category
+        s1 = Series([10, 11, np.nan], dtype="category")
+        s2 = Series(["a", "b", "c"])
+
+        exp = Series([10, 11, np.nan, "a", "b", "c"])
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        exp = Series(["a", "b", "c", 10, 11, np.nan])
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
+
+        # if normal series only contains NaN-likes => not-category
+        s1 = Series([10, 11], dtype="category")
+        s2 = Series([np.nan, np.nan, np.nan])
+
+        exp = Series([10, 11, np.nan, np.nan, np.nan])
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        exp = Series([np.nan, np.nan, np.nan, 10, 11])
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
+
+    def test_concat_categorical_3elem_coercion(self):
+        # GH 13524
+
+        # mixed dtypes => not-category
+        s1 = Series([1, 2, np.nan], dtype="category")
+        s2 = Series([2, 1, 2], dtype="category")
+        s3 = Series([1, 2, 1, 2, np.nan])
+
+        exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
+        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
+
+        exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
+        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
+
+        # values are all in either category => not-category
+        s1 = Series([4, 5, 6], dtype="category")
+        s2 = Series([1, 2, 3], dtype="category")
+        s3 = Series([1, 3, 4])
+
+        exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4])
+        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
+
+        exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3])
+        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
+
+        # values are all in either category => not-category
+        s1 = Series([4, 5, 6], dtype="category")
+        s2 = Series([1, 2, 3], dtype="category")
+        s3 = Series([10, 11, 12])
+
+        exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12])
+        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
+
+        exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3])
+        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
+
+    def test_concat_categorical_multi_coercion(self):
+        # GH 13524
+
+        s1 = Series([1, 3], dtype="category")
+        s2 = Series([3, 4], dtype="category")
+        s3 = Series([2, 3])
+        s4 = Series([2, 2], dtype="category")
+        s5 = Series([1, np.nan])
+        s6 = Series([1, 3, 2], dtype="category")
+
+        # mixed dtype, values are all in categories => not-category
+        exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2])
+        res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True)
+        tm.assert_series_equal(res, exp)
+        res = s1._append([s2, s3, s4, s5, s6], ignore_index=True)
+        tm.assert_series_equal(res, exp)
+
+        exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3])
+        res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True)
+        tm.assert_series_equal(res, exp)
+        res = s6._append([s5, s4, s3, s2, s1], ignore_index=True)
+        tm.assert_series_equal(res, exp)
+
+    def test_concat_categorical_ordered(self):
+        # GH 13524
+
+        s1 = Series(Categorical([1, 2, np.nan], ordered=True))
+        s2 = Series(Categorical([2, 1, 2], ordered=True))
+
+        exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True))
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True))
+        tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append([s2, s1], ignore_index=True), exp)
+
+    def test_concat_categorical_coercion_nan(self):
+        # GH 13524
+
+        # some edge cases
+        # category + not-category => not category
+        s1 = Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category")
+        s2 = Series([np.nan, 1])
+
+        exp = Series([np.nan, np.nan, np.nan, 1])
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        s1 = Series([1, np.nan], dtype="category")
+        s2 = Series([np.nan, np.nan])
+
+        exp = Series([1, np.nan, np.nan, np.nan], dtype="float")
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        # mixed dtype, all nan-likes => not-category
+        s1 = Series([np.nan, np.nan], dtype="category")
+        s2 = Series([np.nan, np.nan])
+
+        exp = Series([np.nan, np.nan, np.nan, np.nan])
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
+
+        # all category nan-likes => category
+        s1 = Series([np.nan, np.nan], dtype="category")
+        s2 = Series([np.nan, np.nan], dtype="category")
+
+        exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category")
+
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+    def test_concat_categorical_empty(self):
+        # GH 13524
+
+        s1 = Series([], dtype="category")
+        s2 = Series([1, 2], dtype="category")
+
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
+
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
+
+        s1 = Series([], dtype="category")
+        s2 = Series([], dtype="category")
+
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
+
+        s1 = Series([], dtype="category")
+        s2 = Series([], dtype="object")
+
+        # different dtype => not-category
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
+
+        s1 = Series([], dtype="category")
+        s2 = Series([np.nan, np.nan])
+
+        # empty Series is ignored
+        exp = Series([np.nan, np.nan])
+        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
+        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
+
+        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
+        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
+
+    def test_categorical_concat_append(self):
+        cat = Categorical(["a", "b"], categories=["a", "b"])
+        vals = [1, 2]
+        df = DataFrame({"cats": cat, "vals": vals})
+        cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
+        vals2 = [1, 2, 1, 2]
+        exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))
+
+        tm.assert_frame_equal(pd.concat([df, df]), exp)
+        tm.assert_frame_equal(df._append(df), exp)
+
+        # GH 13524 can concat different categories
+        cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
+        vals3 = [1, 2]
+        df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
+
+        res = pd.concat([df, df_different_categories], ignore_index=True)
+        exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
+        tm.assert_frame_equal(res, exp)
+
+        res = df._append(df_different_categories, ignore_index=True)
+        tm.assert_frame_equal(res, exp)
--- a/dist/client/pandas/tests/reshape/concat/test_categorical.py
+++ b/dist/client/pandas/tests/reshape/concat/test_categorical.py
@@ -0,0 +1,240 @@
+import numpy as np
+
+from pandas.core.dtypes.dtypes import CategoricalDtype
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+
+
+class TestCategoricalConcat:
+    def test_categorical_concat(self, sort):
+        # See GH 10177
+        df1 = DataFrame(
+            np.arange(18, dtype="int64").reshape(6, 3), columns=["a", "b", "c"]
+        )
+
+        df2 = DataFrame(np.arange(14, dtype="int64").reshape(7, 2), columns=["a", "c"])
+
+        cat_values = ["one", "one", "two", "one", "two", "two", "one"]
+        df2["h"] = Series(Categorical(cat_values))
+
+        res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
+        exp = DataFrame(
+            {
+                "a": [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
+                "b": [
+                    1,
+                    4,
+                    7,
+                    10,
+                    13,
+                    16,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                    np.nan,
+                ],
+                "c": [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13],
+                "h": [None] * 6 + cat_values,
+            }
+        )
+        exp["h"] = exp["h"].astype(df2["h"].dtype)
+        tm.assert_frame_equal(res, exp)
+
+    def test_categorical_concat_dtypes(self):
+
+        # GH8143
+        index = ["cat", "obj", "num"]
+        cat = Categorical(["a", "b", "c"])
+        obj = Series(["a", "b", "c"])
+        num = Series([1, 2, 3])
+        df = pd.concat([Series(cat), obj, num], axis=1, keys=index)
+
+        result = df.dtypes == "object"
+        expected = Series([False, True, False], index=index)
+        tm.assert_series_equal(result, expected)
+
+        result = df.dtypes == "int64"
+        expected = Series([False, False, True], index=index)
+        tm.assert_series_equal(result, expected)
+
+        result = df.dtypes == "category"
+        expected = Series([True, False, False], index=index)
+        tm.assert_series_equal(result, expected)
+
+    def test_concat_categoricalindex(self):
+        # GH 16111, categories that aren't lexsorted
+        categories = [9, 0, 1, 2, 3]
+
+        a = Series(1, index=pd.CategoricalIndex([9, 0], categories=categories))
+        b = Series(2, index=pd.CategoricalIndex([0, 1], categories=categories))
+        c = Series(3, index=pd.CategoricalIndex([1, 2], categories=categories))
+
+        result = pd.concat([a, b, c], axis=1)
+
+        exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories)
+        exp = DataFrame(
+            {
+                0: [1, 1, np.nan, np.nan],
+                1: [np.nan, 2, 2, np.nan],
+                2: [np.nan, np.nan, 3, 3],
+            },
+            columns=[0, 1, 2],
+            index=exp_idx,
+        )
+        tm.assert_frame_equal(result, exp)
+
+    def test_categorical_concat_preserve(self):
+
+        # GH 8641  series concat not preserving category dtype
+        # GH 13524 can concat different categories
+        s = Series(list("abc"), dtype="category")
+        s2 = Series(list("abd"), dtype="category")
+
+        exp = Series(list("abcabd"))
+        res = pd.concat([s, s2], ignore_index=True)
+        tm.assert_series_equal(res, exp)
+
+        exp = Series(list("abcabc"), dtype="category")
+        res = pd.concat([s, s], ignore_index=True)
+        tm.assert_series_equal(res, exp)
+
+        exp = Series(list("abcabc"), index=[0, 1, 2, 0, 1, 2], dtype="category")
+        res = pd.concat([s, s])
+        tm.assert_series_equal(res, exp)
+
+        a = Series(np.arange(6, dtype="int64"))
+        b = Series(list("aabbca"))
+
+        df2 = DataFrame({"A": a, "B": b.astype(CategoricalDtype(list("cab")))})
+        res = pd.concat([df2, df2])
+        exp = DataFrame(
+            {
+                "A": pd.concat([a, a]),
+                "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
+            }
+        )
+        tm.assert_frame_equal(res, exp)
+
+    def test_categorical_index_preserver(self):
+
+        a = Series(np.arange(6, dtype="int64"))
+        b = Series(list("aabbca"))
+
+        df2 = DataFrame(
+            {"A": a, "B": b.astype(CategoricalDtype(list("cab")))}
+        ).set_index("B")
+        result = pd.concat([df2, df2])
+        expected = DataFrame(
+            {
+                "A": pd.concat([a, a]),
+                "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
+            }
+        ).set_index("B")
+        tm.assert_frame_equal(result, expected)
+
+        # wrong categories -> uses concat_compat, which casts to object
+        df3 = DataFrame(
+            {"A": a, "B": Categorical(b, categories=list("abe"))}
+        ).set_index("B")
+        result = pd.concat([df2, df3])
+        expected = pd.concat(
+            [
+                df2.set_axis(df2.index.astype(object), axis=0),
+                df3.set_axis(df3.index.astype(object), axis=0),
+            ]
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_categorical_tz(self):
+        # GH-23816
+        a = Series(pd.date_range("2017-01-01", periods=2, tz="US/Pacific"))
+        b = Series(["a", "b"], dtype="category")
+        result = pd.concat([a, b], ignore_index=True)
+        expected = Series(
+            [
+                pd.Timestamp("2017-01-01", tz="US/Pacific"),
+                pd.Timestamp("2017-01-02", tz="US/Pacific"),
+                "a",
+                "b",
+            ]
+        )
+        tm.assert_series_equal(result, expected)
+
+    def test_concat_categorical_unchanged(self):
+        # GH-12007
+        # test fix for when concat on categorical and float
+        # coerces dtype categorical -> float
+        df = DataFrame(Series(["a", "b", "c"], dtype="category", name="A"))
+        ser = Series([0, 1, 2], index=[0, 1, 3], name="B")
+        result = pd.concat([df, ser], axis=1)
+        expected = DataFrame(
+            {
+                "A": Series(["a", "b", "c", np.nan], dtype="category"),
+                "B": Series([0, 1, np.nan, 2], dtype="float"),
+            }
+        )
+        tm.assert_equal(result, expected)
+
+    def test_categorical_concat_gh7864(self):
+        # GH 7864
+        # make sure ordering is preserved
+        df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list("abbaae")})
+        df["grade"] = Categorical(df["raw_grade"])
+        df["grade"].cat.set_categories(["e", "a", "b"])
+
+        df1 = df[0:3]
+        df2 = df[3:]
+
+        tm.assert_index_equal(df["grade"].cat.categories, df1["grade"].cat.categories)
+        tm.assert_index_equal(df["grade"].cat.categories, df2["grade"].cat.categories)
+
+        dfx = pd.concat([df1, df2])
+        tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories)
+
+        dfa = df1._append(df2)
+        tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories)
+
+    def test_categorical_index_upcast(self):
+        # GH 17629
+        # test upcasting to object when concatinating on categorical indexes
+        # with non-identical categories
+
+        a = DataFrame({"foo": [1, 2]}, index=Categorical(["foo", "bar"]))
+        b = DataFrame({"foo": [4, 3]}, index=Categorical(["baz", "bar"]))
+
+        res = pd.concat([a, b])
+        exp = DataFrame({"foo": [1, 2, 4, 3]}, index=["foo", "bar", "baz", "bar"])
+
+        tm.assert_equal(res, exp)
+
+        a = Series([1, 2], index=Categorical(["foo", "bar"]))
+        b = Series([4, 3], index=Categorical(["baz", "bar"]))
+
+        res = pd.concat([a, b])
+        exp = Series([1, 2, 4, 3], index=["foo", "bar", "baz", "bar"])
+
+        tm.assert_equal(res, exp)
+
+    def test_categorical_missing_from_one_frame(self):
+        # GH 25412
+        df1 = DataFrame({"f1": [1, 2, 3]})
+        df2 = DataFrame({"f1": [2, 3, 1], "f2": Series([4, 4, 4]).astype("category")})
+        result = pd.concat([df1, df2], sort=True)
+        dtype = CategoricalDtype([4])
+        expected = DataFrame(
+            {
+                "f1": [1, 2, 3, 2, 3, 1],
+                "f2": Categorical.from_codes([-1, -1, -1, 0, 0, 0], dtype=dtype),
+            },
+            index=[0, 1, 2, 0, 1, 2],
+        )
+        tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/concat/test_concat.py
+++ b/dist/client/pandas/tests/reshape/concat/test_concat.py
@@ -0,0 +1,794 @@
+from collections import (
+    abc,
+    deque,
+)
+from decimal import Decimal
+from warnings import (
+    catch_warnings,
+    simplefilter,
+)
+
+import numpy as np
+import pytest
+
+from pandas.errors import PerformanceWarning
+import pandas.util._test_decorators as td
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    PeriodIndex,
+    Series,
+    concat,
+    date_range,
+)
+import pandas._testing as tm
+from pandas.core.arrays import SparseArray
+from pandas.core.construction import create_series_with_explicit_dtype
+from pandas.tests.extension.decimal import to_decimal
+
+
+class TestConcatenate:
+    def test_append_concat(self):
+        # GH#1815
+        d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC")
+        d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC")
+
+        s1 = Series(np.random.randn(10), d1)
+        s2 = Series(np.random.randn(10), d2)
+
+        s1 = s1.to_period()
+        s2 = s2.to_period()
+
+        # drops index
+        result = concat([s1, s2])
+        assert isinstance(result.index, PeriodIndex)
+        assert result.index[0] == s1.index[0]
+
+    def test_concat_copy(self, using_array_manager):
+        df = DataFrame(np.random.randn(4, 3))
+        df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1))
+        df3 = DataFrame({5: "foo"}, index=range(4))
+
+        # These are actual copies.
+        result = concat([df, df2, df3], axis=1, copy=True)
+
+        for arr in result._mgr.arrays:
+            assert arr.base is None
+
+        # These are the same.
+        result = concat([df, df2, df3], axis=1, copy=False)
+
+        for arr in result._mgr.arrays:
+            if arr.dtype.kind == "f":
+                assert arr.base is df._mgr.arrays[0].base
+            elif arr.dtype.kind in ["i", "u"]:
+                assert arr.base is df2._mgr.arrays[0].base
+            elif arr.dtype == object:
+                if using_array_manager:
+                    # we get the same array object, which has no base
+                    assert arr is df3._mgr.arrays[0]
+                else:
+                    assert arr.base is not None
+
+        # Float block was consolidated.
+        df4 = DataFrame(np.random.randn(4, 1))
+        result = concat([df, df2, df3, df4], axis=1, copy=False)
+        for arr in result._mgr.arrays:
+            if arr.dtype.kind == "f":
+                if using_array_manager:
+                    # this is a view on some array in either df or df4
+                    assert any(
+                        np.shares_memory(arr, other)
+                        for other in df._mgr.arrays + df4._mgr.arrays
+                    )
+                else:
+                    # the block was consolidated, so we got a copy anyway
+                    assert arr.base is None
+            elif arr.dtype.kind in ["i", "u"]:
+                assert arr.base is df2._mgr.arrays[0].base
+            elif arr.dtype == object:
+                # this is a view on df3
+                assert any(np.shares_memory(arr, other) for other in df3._mgr.arrays)
+
+    def test_concat_with_group_keys(self):
+        # axis=0
+        df = DataFrame(np.random.randn(3, 4))
+        df2 = DataFrame(np.random.randn(4, 4))
+
+        result = concat([df, df2], keys=[0, 1])
+        exp_index = MultiIndex.from_arrays(
+            [[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]]
+        )
+        expected = DataFrame(np.r_[df.values, df2.values], index=exp_index)
+        tm.assert_frame_equal(result, expected)
+
+        result = concat([df, df], keys=[0, 1])
+        exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
+        expected = DataFrame(np.r_[df.values, df.values], index=exp_index2)
+        tm.assert_frame_equal(result, expected)
+
+        # axis=1
+        df = DataFrame(np.random.randn(4, 3))
+        df2 = DataFrame(np.random.randn(4, 4))
+
+        result = concat([df, df2], keys=[0, 1], axis=1)
+        expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index)
+        tm.assert_frame_equal(result, expected)
+
+        result = concat([df, df], keys=[0, 1], axis=1)
+        expected = DataFrame(np.c_[df.values, df.values], columns=exp_index2)
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_keys_specific_levels(self):
+        df = DataFrame(np.random.randn(10, 4))
+        pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]]
+        level = ["three", "two", "one", "zero"]
+        result = concat(
+            pieces,
+            axis=1,
+            keys=["one", "two", "three"],
+            levels=[level],
+            names=["group_key"],
+        )
+
+        tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key"))
+        tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3]))
+
+        assert result.columns.names == ["group_key", None]
+
+    @pytest.mark.parametrize("mapping", ["mapping", "dict"])
+    def test_concat_mapping(self, mapping, non_dict_mapping_subclass):
+        constructor = dict if mapping == "dict" else non_dict_mapping_subclass
+        frames = constructor(
+            {
+                "foo": DataFrame(np.random.randn(4, 3)),
+                "bar": DataFrame(np.random.randn(4, 3)),
+                "baz": DataFrame(np.random.randn(4, 3)),
+                "qux": DataFrame(np.random.randn(4, 3)),
+            }
+        )
+
+        sorted_keys = list(frames.keys())
+
+        result = concat(frames)
+        expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys)
+        tm.assert_frame_equal(result, expected)
+
+        result = concat(frames, axis=1)
+        expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys, axis=1)
+        tm.assert_frame_equal(result, expected)
+
+        keys = ["baz", "foo", "bar"]
+        result = concat(frames, keys=keys)
+        expected = concat([frames[k] for k in keys], keys=keys)
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_keys_and_levels(self):
+        df = DataFrame(np.random.randn(1, 3))
+        df2 = DataFrame(np.random.randn(1, 4))
+
+        levels = [["foo", "baz"], ["one", "two"]]
+        names = ["first", "second"]
+        result = concat(
+            [df, df2, df, df2],
+            keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
+            levels=levels,
+            names=names,
+        )
+        expected = concat([df, df2, df, df2])
+        exp_index = MultiIndex(
+            levels=levels + [[0]],
+            codes=[[0, 0, 1, 1], [0, 1, 0, 1], [0, 0, 0, 0]],
+            names=names + [None],
+        )
+        expected.index = exp_index
+
+        tm.assert_frame_equal(result, expected)
+
+        # no names
+        result = concat(
+            [df, df2, df, df2],
+            keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
+            levels=levels,
+        )
+        assert result.index.names == (None,) * 3
+
+        # no levels
+        result = concat(
+            [df, df2, df, df2],
+            keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
+            names=["first", "second"],
+        )
+        assert result.index.names == ("first", "second", None)
+        tm.assert_index_equal(
+            result.index.levels[0], Index(["baz", "foo"], name="first")
+        )
+
+    def test_concat_keys_levels_no_overlap(self):
+        # GH #1406
+        df = DataFrame(np.random.randn(1, 3), index=["a"])
+        df2 = DataFrame(np.random.randn(1, 4), index=["b"])
+
+        msg = "Values not found in passed level"
+        with pytest.raises(ValueError, match=msg):
+            concat([df, df], keys=["one", "two"], levels=[["foo", "bar", "baz"]])
+
+        msg = "Key one not in level"
+        with pytest.raises(ValueError, match=msg):
+            concat([df, df2], keys=["one", "two"], levels=[["foo", "bar", "baz"]])
+
+    def test_crossed_dtypes_weird_corner(self):
+        columns = ["A", "B", "C", "D"]
+        df1 = DataFrame(
+            {
+                "A": np.array([1, 2, 3, 4], dtype="f8"),
+                "B": np.array([1, 2, 3, 4], dtype="i8"),
+                "C": np.array([1, 2, 3, 4], dtype="f8"),
+                "D": np.array([1, 2, 3, 4], dtype="i8"),
+            },
+            columns=columns,
+        )
+
+        df2 = DataFrame(
+            {
+                "A": np.array([1, 2, 3, 4], dtype="i8"),
+                "B": np.array([1, 2, 3, 4], dtype="f8"),
+                "C": np.array([1, 2, 3, 4], dtype="i8"),
+                "D": np.array([1, 2, 3, 4], dtype="f8"),
+            },
+            columns=columns,
+        )
+
+        appended = concat([df1, df2], ignore_index=True)
+        expected = DataFrame(
+            np.concatenate([df1.values, df2.values], axis=0), columns=columns
+        )
+        tm.assert_frame_equal(appended, expected)
+
+        df = DataFrame(np.random.randn(1, 3), index=["a"])
+        df2 = DataFrame(np.random.randn(1, 4), index=["b"])
+        result = concat([df, df2], keys=["one", "two"], names=["first", "second"])
+        assert result.index.names == ("first", "second")
+
+    def test_with_mixed_tuples(self, sort):
+        # 10697
+        # columns have mixed tuples, so handle properly
+        df1 = DataFrame({"A": "foo", ("B", 1): "bar"}, index=range(2))
+        df2 = DataFrame({"B": "foo", ("B", 1): "bar"}, index=range(2))
+
+        # it works
+        concat([df1, df2], sort=sort)
+
+    def test_concat_mixed_objs(self):
+
+        # concat mixed series/frames
+        # G2385
+
+        # axis 1
+        index = date_range("01-Jan-2013", periods=10, freq="H")
+        arr = np.arange(10, dtype="int64")
+        s1 = Series(arr, index=index)
+        s2 = Series(arr, index=index)
+        df = DataFrame(arr.reshape(-1, 1), index=index)
+
+        expected = DataFrame(
+            np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 0]
+        )
+        result = concat([df, df], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 1]
+        )
+        result = concat([s1, s2], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2]
+        )
+        result = concat([s1, s2, s1], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3]
+        )
+        result = concat([s1, df, s2, s2, s1], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+        # with names
+        s1.name = "foo"
+        expected = DataFrame(
+            np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, 0]
+        )
+        result = concat([s1, df, s2], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+        s2.name = "bar"
+        expected = DataFrame(
+            np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, "bar"]
+        )
+        result = concat([s1, df, s2], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+        # ignore index
+        expected = DataFrame(
+            np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2]
+        )
+        result = concat([s1, df, s2], axis=1, ignore_index=True)
+        tm.assert_frame_equal(result, expected)
+
+        # axis 0
+        expected = DataFrame(
+            np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0]
+        )
+        result = concat([s1, df, s2])
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
+        result = concat([s1, df, s2], ignore_index=True)
+        tm.assert_frame_equal(result, expected)
+
+    def test_dtype_coerceion(self):
+
+        # 12411
+        df = DataFrame({"date": [pd.Timestamp("20130101").tz_localize("UTC"), pd.NaT]})
+
+        result = concat([df.iloc[[0]], df.iloc[[1]]])
+        tm.assert_series_equal(result.dtypes, df.dtypes)
+
+        # 12045
+        import datetime
+
+        df = DataFrame(
+            {"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]}
+        )
+        result = concat([df.iloc[[0]], df.iloc[[1]]])
+        tm.assert_series_equal(result.dtypes, df.dtypes)
+
+        # 11594
+        df = DataFrame({"text": ["some words"] + [None] * 9})
+        result = concat([df.iloc[[0]], df.iloc[[1]]])
+        tm.assert_series_equal(result.dtypes, df.dtypes)
+
+    def test_concat_single_with_key(self):
+        df = DataFrame(np.random.randn(10, 4))
+
+        result = concat([df], keys=["foo"])
+        expected = concat([df, df], keys=["foo", "bar"])
+        tm.assert_frame_equal(result, expected[:10])
+
+    def test_concat_no_items_raises(self):
+        with pytest.raises(ValueError, match="No objects to concatenate"):
+            concat([])
+
+    def test_concat_exclude_none(self):
+        df = DataFrame(np.random.randn(10, 4))
+
+        pieces = [df[:5], None, None, df[5:]]
+        result = concat(pieces)
+        tm.assert_frame_equal(result, df)
+        with pytest.raises(ValueError, match="All objects passed were None"):
+            concat([None, None])
+
+    def test_concat_keys_with_none(self):
+        # #1649
+        df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]])
+
+        result = concat({"a": None, "b": df0, "c": df0[:2], "d": df0[:1], "e": df0})
+        expected = concat({"b": df0, "c": df0[:2], "d": df0[:1], "e": df0})
+        tm.assert_frame_equal(result, expected)
+
+        result = concat(
+            [None, df0, df0[:2], df0[:1], df0], keys=["a", "b", "c", "d", "e"]
+        )
+        expected = concat([df0, df0[:2], df0[:1], df0], keys=["b", "c", "d", "e"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_bug_1719(self):
+        ts1 = tm.makeTimeSeries()
+        ts2 = tm.makeTimeSeries()[::2]
+
+        # to join with union
+        # these two are of different length!
+        left = concat([ts1, ts2], join="outer", axis=1)
+        right = concat([ts2, ts1], join="outer", axis=1)
+
+        assert len(left) == len(right)
+
+    def test_concat_bug_2972(self):
+        ts0 = Series(np.zeros(5))
+        ts1 = Series(np.ones(5))
+        ts0.name = ts1.name = "same name"
+        result = concat([ts0, ts1], axis=1)
+
+        expected = DataFrame({0: ts0, 1: ts1})
+        expected.columns = ["same name", "same name"]
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_bug_3602(self):
+
+        # GH 3602, duplicate columns
+        df1 = DataFrame(
+            {
+                "firmNo": [0, 0, 0, 0],
+                "prc": [6, 6, 6, 6],
+                "stringvar": ["rrr", "rrr", "rrr", "rrr"],
+            }
+        )
+        df2 = DataFrame(
+            {"C": [9, 10, 11, 12], "misc": [1, 2, 3, 4], "prc": [6, 6, 6, 6]}
+        )
+        expected = DataFrame(
+            [
+                [0, 6, "rrr", 9, 1, 6],
+                [0, 6, "rrr", 10, 2, 6],
+                [0, 6, "rrr", 11, 3, 6],
+                [0, 6, "rrr", 12, 4, 6],
+            ]
+        )
+        expected.columns = ["firmNo", "prc", "stringvar", "C", "misc", "prc"]
+
+        result = concat([df1, df2], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_iterables(self):
+        # GH8645 check concat works with tuples, list, generators, and weird
+        # stuff like deque and custom iterables
+        df1 = DataFrame([1, 2, 3])
+        df2 = DataFrame([4, 5, 6])
+        expected = DataFrame([1, 2, 3, 4, 5, 6])
+        tm.assert_frame_equal(concat((df1, df2), ignore_index=True), expected)
+        tm.assert_frame_equal(concat([df1, df2], ignore_index=True), expected)
+        tm.assert_frame_equal(
+            concat((df for df in (df1, df2)), ignore_index=True), expected
+        )
+        tm.assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected)
+
+        class CustomIterator1:
+            def __len__(self) -> int:
+                return 2
+
+            def __getitem__(self, index):
+                try:
+                    return {0: df1, 1: df2}[index]
+                except KeyError as err:
+                    raise IndexError from err
+
+        tm.assert_frame_equal(concat(CustomIterator1(), ignore_index=True), expected)
+
+        class CustomIterator2(abc.Iterable):
+            def __iter__(self):
+                yield df1
+                yield df2
+
+        tm.assert_frame_equal(concat(CustomIterator2(), ignore_index=True), expected)
+
+    def test_concat_order(self):
+        # GH 17344
+        dfs = [DataFrame(index=range(3), columns=["a", 1, None])]
+        dfs += [DataFrame(index=range(3), columns=[None, 1, "a"]) for i in range(100)]
+
+        result = concat(dfs, sort=True).columns
+        expected = dfs[0].columns
+        tm.assert_index_equal(result, expected)
+
+    def test_concat_different_extension_dtypes_upcasts(self):
+        a = Series(pd.array([1, 2], dtype="Int64"))
+        b = Series(to_decimal([1, 2]))
+
+        result = concat([a, b], ignore_index=True)
+        expected = Series([1, 2, Decimal(1), Decimal(2)], dtype=object)
+        tm.assert_series_equal(result, expected)
+
+    def test_concat_ordered_dict(self):
+        # GH 21510
+        expected = concat(
+            [Series(range(3)), Series(range(4))], keys=["First", "Another"]
+        )
+        result = concat({"First": Series(range(3)), "Another": Series(range(4))})
+        tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("pdt", [Series, DataFrame])
+@pytest.mark.parametrize("dt", np.sctypes["float"])
+def test_concat_no_unnecessary_upcast(dt, pdt):
+    # GH 13247
+    dims = pdt(dtype=object).ndim
+
+    dfs = [
+        pdt(np.array([1], dtype=dt, ndmin=dims)),
+        pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
+        pdt(np.array([5], dtype=dt, ndmin=dims)),
+    ]
+    x = concat(dfs)
+    assert x.values.dtype == dt
+
+
+@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, DataFrame])
+@pytest.mark.parametrize("dt", np.sctypes["int"])
+def test_concat_will_upcast(dt, pdt):
+    with catch_warnings(record=True):
+        dims = pdt().ndim
+        dfs = [
+            pdt(np.array([1], dtype=dt, ndmin=dims)),
+            pdt(np.array([np.nan], ndmin=dims)),
+            pdt(np.array([5], dtype=dt, ndmin=dims)),
+        ]
+        x = concat(dfs)
+        assert x.values.dtype == "float64"
+
+
+def test_concat_empty_and_non_empty_frame_regression():
+    # GH 18178 regression test
+    df1 = DataFrame({"foo": [1]})
+    df2 = DataFrame({"foo": []})
+    expected = DataFrame({"foo": [1.0]})
+    result = concat([df1, df2])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_concat_sparse():
+    # GH 23557
+    a = Series(SparseArray([0, 1, 2]))
+    expected = DataFrame(data=[[0, 0], [1, 1], [2, 2]]).astype(
+        pd.SparseDtype(np.int64, 0)
+    )
+    result = concat([a, a], axis=1)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_concat_dense_sparse():
+    # GH 30668
+    a = Series(pd.arrays.SparseArray([1, None]), dtype=float)
+    b = Series([1], dtype=float)
+    expected = Series(data=[1, None, 1], index=[0, 1, 0]).astype(
+        pd.SparseDtype(np.float64, None)
+    )
+    result = concat([a, b], axis=0)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("keys", [["e", "f", "f"], ["f", "e", "f"]])
+def test_duplicate_keys(keys):
+    # GH 33654
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    s1 = Series([7, 8, 9], name="c")
+    s2 = Series([10, 11, 12], name="d")
+    result = concat([df, s1, s2], axis=1, keys=keys)
+    expected_values = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]]
+    expected_columns = MultiIndex.from_tuples(
+        [(keys[0], "a"), (keys[0], "b"), (keys[1], "c"), (keys[2], "d")]
+    )
+    expected = DataFrame(expected_values, columns=expected_columns)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_duplicate_keys_same_frame():
+    # GH 43595
+    keys = ["e", "e"]
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    result = concat([df, df], axis=1, keys=keys)
+    expected_values = [[1, 4, 1, 4], [2, 5, 2, 5], [3, 6, 3, 6]]
+    expected_columns = MultiIndex.from_tuples(
+        [(keys[0], "a"), (keys[0], "b"), (keys[1], "a"), (keys[1], "b")]
+    )
+    expected = DataFrame(expected_values, columns=expected_columns)
+    with catch_warnings():
+        # result.columns not sorted, resulting in performance warning
+        simplefilter("ignore", PerformanceWarning)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "obj",
+    [
+        tm.SubclassedDataFrame({"A": np.arange(0, 10)}),
+        tm.SubclassedSeries(np.arange(0, 10), name="A"),
+    ],
+)
+def test_concat_preserves_subclass(obj):
+    # GH28330 -- preserve subclass
+
+    result = concat([obj, obj])
+    assert isinstance(result, type(obj))
+
+
+def test_concat_frame_axis0_extension_dtypes():
+    # preserve extension dtype (through common_dtype mechanism)
+    df1 = DataFrame({"a": pd.array([1, 2, 3], dtype="Int64")})
+    df2 = DataFrame({"a": np.array([4, 5, 6])})
+
+    result = concat([df1, df2], ignore_index=True)
+    expected = DataFrame({"a": [1, 2, 3, 4, 5, 6]}, dtype="Int64")
+    tm.assert_frame_equal(result, expected)
+
+    result = concat([df2, df1], ignore_index=True)
+    expected = DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64")
+    tm.assert_frame_equal(result, expected)
+
+
+def test_concat_preserves_extension_int64_dtype():
+    # GH 24768
+    df_a = DataFrame({"a": [-1]}, dtype="Int64")
+    df_b = DataFrame({"b": [1]}, dtype="Int64")
+    result = concat([df_a, df_b], ignore_index=True)
+    expected = DataFrame({"a": [-1, None], "b": [None, 1]}, dtype="Int64")
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype1,dtype2,expected_dtype",
+    [
+        ("bool", "bool", "bool"),
+        ("boolean", "bool", "boolean"),
+        ("bool", "boolean", "boolean"),
+        ("boolean", "boolean", "boolean"),
+    ],
+)
+def test_concat_bool_types(dtype1, dtype2, expected_dtype):
+    # GH 42800
+    ser1 = Series([True, False], dtype=dtype1)
+    ser2 = Series([False, True], dtype=dtype2)
+    result = concat([ser1, ser2], ignore_index=True)
+    expected = Series([True, False, False, True], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    ("keys", "integrity"),
+    [
+        (["red"] * 3, True),
+        (["red"] * 3, False),
+        (["red", "blue", "red"], False),
+        (["red", "blue", "red"], True),
+    ],
+)
+def test_concat_repeated_keys(keys, integrity):
+    # GH: 20816
+    series_list = [Series({"a": 1}), Series({"b": 2}), Series({"c": 3})]
+    result = concat(series_list, keys=keys, verify_integrity=integrity)
+    tuples = list(zip(keys, ["a", "b", "c"]))
+    expected = Series([1, 2, 3], index=MultiIndex.from_tuples(tuples))
+    tm.assert_series_equal(result, expected)
+
+
+def test_concat_null_object_with_dti():
+    # GH#40841
+    dti = pd.DatetimeIndex(
+        ["2021-04-08 21:21:14+00:00"], dtype="datetime64[ns, UTC]", name="Time (UTC)"
+    )
+    right = DataFrame(data={"C": [0.5274]}, index=dti)
+
+    idx = Index([None], dtype="object", name="Maybe Time (UTC)")
+    left = DataFrame(data={"A": [None], "B": [np.nan]}, index=idx)
+
+    result = concat([left, right], axis="columns")
+
+    exp_index = Index([None, dti[0]], dtype=object)
+    expected = DataFrame(
+        {"A": [None, None], "B": [np.nan, np.nan], "C": [np.nan, 0.5274]},
+        index=exp_index,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_concat_multiindex_with_empty_rangeindex():
+    # GH#41234
+    mi = MultiIndex.from_tuples([("B", 1), ("C", 1)])
+    df1 = DataFrame([[1, 2]], columns=mi)
+    df2 = DataFrame(index=[1], columns=pd.RangeIndex(0))
+
+    result = concat([df1, df2])
+    expected = DataFrame([[1, 2], [np.nan, np.nan]], columns=mi)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_concat_posargs_deprecation():
+    # https://github.com/pandas-dev/pandas/issues/41485
+    df = DataFrame([[1, 2, 3]], index=["a"])
+    df2 = DataFrame([[4, 5, 6]], index=["b"])
+
+    msg = (
+        "In a future version of pandas all arguments of concat "
+        "except for the argument 'objs' will be keyword-only"
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = concat([df, df2], 0)
+    expected = DataFrame([[1, 2, 3], [4, 5, 6]], index=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        Series(data=[1, 2]),
+        DataFrame(
+            data={
+                "col1": [1, 2],
+            }
+        ),
+        DataFrame(dtype=float),
+        Series(dtype=float),
+    ],
+)
+def test_concat_drop_attrs(data):
+    # GH#41828
+    df1 = data.copy()
+    df1.attrs = {1: 1}
+    df2 = data.copy()
+    df2.attrs = {1: 2}
+    df = concat([df1, df2])
+    assert len(df.attrs) == 0
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        Series(data=[1, 2]),
+        DataFrame(
+            data={
+                "col1": [1, 2],
+            }
+        ),
+        DataFrame(dtype=float),
+        Series(dtype=float),
+    ],
+)
+def test_concat_retain_attrs(data):
+    # GH#41828
+    df1 = data.copy()
+    df1.attrs = {1: 1}
+    df2 = data.copy()
+    df2.attrs = {1: 1}
+    df = concat([df1, df2])
+    assert df.attrs[1] == 1
+
+
+@td.skip_array_manager_invalid_test
+@pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"])
+@pytest.mark.parametrize("empty_dtype", [None, "float64", "object"])
+def test_concat_ignore_emtpy_object_float(empty_dtype, df_dtype):
+    # https://github.com/pandas-dev/pandas/issues/45637
+    df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype)
+    empty = DataFrame(columns=["foo", "bar"], dtype=empty_dtype)
+    result = concat([empty, df])
+    expected = df
+    if df_dtype == "int64":
+        # TODO what exact behaviour do we want for integer eventually?
+        if empty_dtype == "float64":
+            expected = df.astype("float64")
+        else:
+            expected = df.astype("object")
+    tm.assert_frame_equal(result, expected)
+
+
+@td.skip_array_manager_invalid_test
+@pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"])
+@pytest.mark.parametrize("empty_dtype", [None, "float64", "object"])
+def test_concat_ignore_all_na_object_float(empty_dtype, df_dtype):
+    df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype)
+    empty = DataFrame({"foo": [np.nan], "bar": [np.nan]}, dtype=empty_dtype)
+    result = concat([empty, df], ignore_index=True)
+
+    if df_dtype == "int64":
+        # TODO what exact behaviour do we want for integer eventually?
+        if empty_dtype == "object":
+            df_dtype = "object"
+        else:
+            df_dtype = "float64"
+    expected = DataFrame({"foo": [None, 1, 2], "bar": [None, 1, 2]}, dtype=df_dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@td.skip_array_manager_invalid_test
+def test_concat_ignore_empty_from_reindex():
+    # https://github.com/pandas-dev/pandas/pull/43507#issuecomment-920375856
+    df1 = DataFrame({"a": [1], "b": [pd.Timestamp("2012-01-01")]})
+    df2 = DataFrame({"a": [2]})
+
+    result = concat([df1, df2.reindex(columns=df1.columns)], ignore_index=True)
+    expected = df1 = DataFrame({"a": [1, 2], "b": [pd.Timestamp("2012-01-01"), pd.NaT]})
+    tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/concat/test_dataframe.py
+++ b/dist/client/pandas/tests/reshape/concat/test_dataframe.py
@@ -0,0 +1,230 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+    concat,
+)
+import pandas._testing as tm
+
+
+class TestDataFrameConcat:
+    def test_concat_multiple_frames_dtypes(self):
+
+        # GH#2759
+        df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64)
+        df2 = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
+        results = concat((df1, df2), axis=1).dtypes
+        expected = Series(
+            [np.dtype("float64")] * 2 + [np.dtype("float32")] * 2,
+            index=["foo", "bar", 0, 1],
+        )
+        tm.assert_series_equal(results, expected)
+
+    def test_concat_tuple_keys(self):
+        # GH#14438
+        df1 = DataFrame(np.ones((2, 2)), columns=list("AB"))
+        df2 = DataFrame(np.ones((3, 2)) * 2, columns=list("AB"))
+        results = concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")])
+        expected = DataFrame(
+            {
+                "A": {
+                    ("bee", "bah", 0): 1.0,
+                    ("bee", "bah", 1): 1.0,
+                    ("bee", "boo", 0): 2.0,
+                    ("bee", "boo", 1): 2.0,
+                    ("bee", "boo", 2): 2.0,
+                },
+                "B": {
+                    ("bee", "bah", 0): 1.0,
+                    ("bee", "bah", 1): 1.0,
+                    ("bee", "boo", 0): 2.0,
+                    ("bee", "boo", 1): 2.0,
+                    ("bee", "boo", 2): 2.0,
+                },
+            }
+        )
+        tm.assert_frame_equal(results, expected)
+
+    def test_concat_named_keys(self):
+        # GH#14252
+        df = DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]})
+        index = Index(["a", "b"], name="baz")
+        concatted_named_from_keys = concat([df, df], keys=index)
+        expected_named = DataFrame(
+            {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]},
+            index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=["baz", None]),
+        )
+        tm.assert_frame_equal(concatted_named_from_keys, expected_named)
+
+        index_no_name = Index(["a", "b"], name=None)
+        concatted_named_from_names = concat([df, df], keys=index_no_name, names=["baz"])
+        tm.assert_frame_equal(concatted_named_from_names, expected_named)
+
+        concatted_unnamed = concat([df, df], keys=index_no_name)
+        expected_unnamed = DataFrame(
+            {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]},
+            index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=[None, None]),
+        )
+        tm.assert_frame_equal(concatted_unnamed, expected_unnamed)
+
+    def test_concat_axis_parameter(self):
+        # GH#14369
+        df1 = DataFrame({"A": [0.1, 0.2]}, index=range(2))
+        df2 = DataFrame({"A": [0.3, 0.4]}, index=range(2))
+
+        # Index/row/0 DataFrame
+        expected_index = DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1])
+
+        concatted_index = concat([df1, df2], axis="index")
+        tm.assert_frame_equal(concatted_index, expected_index)
+
+        concatted_row = concat([df1, df2], axis="rows")
+        tm.assert_frame_equal(concatted_row, expected_index)
+
+        concatted_0 = concat([df1, df2], axis=0)
+        tm.assert_frame_equal(concatted_0, expected_index)
+
+        # Columns/1 DataFrame
+        expected_columns = DataFrame(
+            [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=["A", "A"]
+        )
+
+        concatted_columns = concat([df1, df2], axis="columns")
+        tm.assert_frame_equal(concatted_columns, expected_columns)
+
+        concatted_1 = concat([df1, df2], axis=1)
+        tm.assert_frame_equal(concatted_1, expected_columns)
+
+        series1 = Series([0.1, 0.2])
+        series2 = Series([0.3, 0.4])
+
+        # Index/row/0 Series
+        expected_index_series = Series([0.1, 0.2, 0.3, 0.4], index=[0, 1, 0, 1])
+
+        concatted_index_series = concat([series1, series2], axis="index")
+        tm.assert_series_equal(concatted_index_series, expected_index_series)
+
+        concatted_row_series = concat([series1, series2], axis="rows")
+        tm.assert_series_equal(concatted_row_series, expected_index_series)
+
+        concatted_0_series = concat([series1, series2], axis=0)
+        tm.assert_series_equal(concatted_0_series, expected_index_series)
+
+        # Columns/1 Series
+        expected_columns_series = DataFrame(
+            [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1]
+        )
+
+        concatted_columns_series = concat([series1, series2], axis="columns")
+        tm.assert_frame_equal(concatted_columns_series, expected_columns_series)
+
+        concatted_1_series = concat([series1, series2], axis=1)
+        tm.assert_frame_equal(concatted_1_series, expected_columns_series)
+
+        # Testing ValueError
+        with pytest.raises(ValueError, match="No axis named"):
+            concat([series1, series2], axis="something")
+
+    def test_concat_numerical_names(self):
+        # GH#15262, GH#12223
+        df = DataFrame(
+            {"col": range(9)},
+            dtype="int32",
+            index=(
+                pd.MultiIndex.from_product(
+                    [["A0", "A1", "A2"], ["B0", "B1", "B2"]], names=[1, 2]
+                )
+            ),
+        )
+        result = concat((df.iloc[:2, :], df.iloc[-2:, :]))
+        expected = DataFrame(
+            {"col": [0, 1, 7, 8]},
+            dtype="int32",
+            index=pd.MultiIndex.from_tuples(
+                [("A0", "B0"), ("A0", "B1"), ("A2", "B1"), ("A2", "B2")], names=[1, 2]
+            ),
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_astype_dup_col(self):
+        # GH#23049
+        df = DataFrame([{"a": "b"}])
+        df = concat([df, df], axis=1)
+
+        result = df.astype("category")
+        expected = DataFrame(
+            np.array(["b", "b"]).reshape(1, 2), columns=["a", "a"]
+        ).astype("category")
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_dataframe_keys_bug(self, sort):
+        t1 = DataFrame(
+            {"value": Series([1, 2, 3], index=Index(["a", "b", "c"], name="id"))}
+        )
+        t2 = DataFrame({"value": Series([7, 8], index=Index(["a", "b"], name="id"))})
+
+        # it works
+        result = concat([t1, t2], axis=1, keys=["t1", "t2"], sort=sort)
+        assert list(result.columns) == [("t1", "value"), ("t2", "value")]
+
+    def test_concat_bool_with_int(self):
+        # GH#42092 we may want to change this to return object, but that
+        #  would need a deprecation
+        df1 = DataFrame(Series([True, False, True, True], dtype="bool"))
+        df2 = DataFrame(Series([1, 0, 1], dtype="int64"))
+
+        result = concat([df1, df2])
+        expected = concat([df1.astype("int64"), df2])
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_duplicates_in_index_with_keys(self):
+        # GH#42651
+        index = [1, 1, 3]
+        data = [1, 2, 3]
+
+        df = DataFrame(data=data, index=index)
+        result = concat([df], keys=["A"], names=["ID", "date"])
+        mi = pd.MultiIndex.from_product([["A"], index], names=["ID", "date"])
+        expected = DataFrame(data=data, index=mi)
+        tm.assert_frame_equal(result, expected)
+        tm.assert_index_equal(result.index.levels[1], Index([1, 3], name="date"))
+
+    @pytest.mark.parametrize("ignore_index", [True, False])
+    @pytest.mark.parametrize("order", ["C", "F"])
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_concat_copies(self, axis, order, ignore_index):
+        # based on asv ConcatDataFrames
+        df = DataFrame(np.zeros((10000, 200), dtype=np.float32, order=order))
+
+        res = concat([df] * 5, axis=axis, ignore_index=ignore_index, copy=True)
+
+        for arr in res._iter_column_arrays():
+            for arr2 in df._iter_column_arrays():
+                assert not np.shares_memory(arr, arr2)
+
+    def test_outer_sort_columns(self):
+        # GH#47127
+        df1 = DataFrame({"A": [0], "B": [1], 0: 1})
+        df2 = DataFrame({"A": [100]})
+        result = concat([df1, df2], ignore_index=True, join="outer", sort=True)
+        expected = DataFrame({0: [1.0, np.nan], "A": [0, 100], "B": [1.0, np.nan]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_inner_sort_columns(self):
+        # GH#47127
+        df1 = DataFrame({"A": [0], "B": [1], 0: 1})
+        df2 = DataFrame({"A": [100], 0: 2})
+        result = concat([df1, df2], ignore_index=True, join="inner", sort=True)
+        expected = DataFrame({0: [1, 2], "A": [0, 100]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_sort_columns_one_df(self):
+        # GH#47127
+        df1 = DataFrame({"A": [100], 0: 2})
+        result = concat([df1], ignore_index=True, join="inner", sort=True)
+        expected = DataFrame({0: [2], "A": [100]})
+        tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/concat/test_datetimes.py
+++ b/dist/client/pandas/tests/reshape/concat/test_datetimes.py
@@ -0,0 +1,543 @@
+import datetime as dt
+from datetime import datetime
+
+import dateutil
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    MultiIndex,
+    Series,
+    Timestamp,
+    concat,
+    date_range,
+    to_timedelta,
+)
+import pandas._testing as tm
+
+
+class TestDatetimeConcat:
+    def test_concat_datetime64_block(self):
+        from pandas.core.indexes.datetimes import date_range
+
+        rng = date_range("1/1/2000", periods=10)
+
+        df = DataFrame({"time": rng})
+
+        result = concat([df, df])
+        assert (result.iloc[:10]["time"] == rng).all()
+        assert (result.iloc[10:]["time"] == rng).all()
+
+    def test_concat_datetime_datetime64_frame(self):
+        # GH#2624
+        rows = []
+        rows.append([datetime(2010, 1, 1), 1])
+        rows.append([datetime(2010, 1, 2), "hi"])
+
+        df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
+
+        ind = date_range(start="2000/1/1", freq="D", periods=10)
+        df1 = DataFrame({"date": ind, "test": range(10)})
+
+        # it works!
+        concat([df1, df2_obj])
+
+    def test_concat_datetime_timezone(self):
+        # GH 18523
+        idx1 = date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris")
+        idx2 = date_range(start=idx1[0], end=idx1[-1], freq="H")
+        df1 = DataFrame({"a": [1, 2, 3]}, index=idx1)
+        df2 = DataFrame({"b": [1, 2, 3]}, index=idx2)
+        result = concat([df1, df2], axis=1)
+
+        exp_idx = (
+            DatetimeIndex(
+                [
+                    "2011-01-01 00:00:00+01:00",
+                    "2011-01-01 01:00:00+01:00",
+                    "2011-01-01 02:00:00+01:00",
+                ],
+                freq="H",
+            )
+            .tz_convert("UTC")
+            .tz_convert("Europe/Paris")
+        )
+
+        expected = DataFrame(
+            [[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"]
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+        idx3 = date_range("2011-01-01", periods=3, freq="H", tz="Asia/Tokyo")
+        df3 = DataFrame({"b": [1, 2, 3]}, index=idx3)
+        result = concat([df1, df3], axis=1)
+
+        exp_idx = DatetimeIndex(
+            [
+                "2010-12-31 15:00:00+00:00",
+                "2010-12-31 16:00:00+00:00",
+                "2010-12-31 17:00:00+00:00",
+                "2010-12-31 23:00:00+00:00",
+                "2011-01-01 00:00:00+00:00",
+                "2011-01-01 01:00:00+00:00",
+            ]
+        )
+
+        expected = DataFrame(
+            [
+                [np.nan, 1],
+                [np.nan, 2],
+                [np.nan, 3],
+                [1, np.nan],
+                [2, np.nan],
+                [3, np.nan],
+            ],
+            index=exp_idx,
+            columns=["a", "b"],
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+        # GH 13783: Concat after resample
+        result = concat([df1.resample("H").mean(), df2.resample("H").mean()], sort=True)
+        expected = DataFrame(
+            {"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]},
+            index=idx1.append(idx1),
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_datetimeindex_freq(self):
+        # GH 3232
+        # Monotonic index result
+        dr = date_range("01-Jan-2013", periods=100, freq="50L", tz="UTC")
+        data = list(range(100))
+        expected = DataFrame(data, index=dr)
+        result = concat([expected[:50], expected[50:]])
+        tm.assert_frame_equal(result, expected)
+
+        # Non-monotonic index result
+        result = concat([expected[50:], expected[:50]])
+        expected = DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50]))
+        expected.index._data.freq = None
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_multiindex_datetime_object_index(self):
+        # https://github.com/pandas-dev/pandas/issues/11058
+        idx = Index(
+            [dt.date(2013, 1, 1), dt.date(2014, 1, 1), dt.date(2015, 1, 1)],
+            dtype="object",
+        )
+
+        s = Series(
+            ["a", "b"],
+            index=MultiIndex.from_arrays(
+                [
+                    [1, 2],
+                    idx[:-1],
+                ],
+                names=["first", "second"],
+            ),
+        )
+        s2 = Series(
+            ["a", "b"],
+            index=MultiIndex.from_arrays(
+                [[1, 2], idx[::2]],
+                names=["first", "second"],
+            ),
+        )
+        mi = MultiIndex.from_arrays(
+            [[1, 2, 2], idx],
+            names=["first", "second"],
+        )
+        assert mi.levels[1].dtype == object
+
+        expected = DataFrame(
+            [["a", "a"], ["b", np.nan], [np.nan, "b"]],
+            index=mi,
+        )
+        result = concat([s, s2], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_NaT_series(self):
+        # GH 11693
+        # test for merging NaT series with datetime series.
+        x = Series(
+            date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="US/Eastern")
+        )
+        y = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]")
+        expected = Series([x[0], x[1], pd.NaT, pd.NaT])
+
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+
+        # all NaT with tz
+        expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns, US/Eastern]")
+        result = concat([y, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+
+        # without tz
+        x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h"))
+        y = Series(date_range("20151124 10:00", "20151124 11:00", freq="1h"))
+        y[:] = pd.NaT
+        expected = Series([x[0], x[1], pd.NaT, pd.NaT])
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+
+        # all NaT without tz
+        x[:] = pd.NaT
+        expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns]")
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("tz", [None, "UTC"])
+    def test_concat_NaT_dataframes(self, tz):
+        # GH 12396
+
+        first = DataFrame([[pd.NaT], [pd.NaT]])
+        first = first.apply(lambda x: x.dt.tz_localize(tz))
+        second = DataFrame(
+            [[Timestamp("2015/01/01", tz=tz)], [Timestamp("2016/01/01", tz=tz)]],
+            index=[2, 3],
+        )
+        expected = DataFrame(
+            [
+                pd.NaT,
+                pd.NaT,
+                Timestamp("2015/01/01", tz=tz),
+                Timestamp("2016/01/01", tz=tz),
+            ]
+        )
+
+        result = concat([first, second], axis=0)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("tz1", [None, "UTC"])
+    @pytest.mark.parametrize("tz2", [None, "UTC"])
+    @pytest.mark.parametrize("s", [pd.NaT, Timestamp("20150101")])
+    def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s):
+        # GH 12396
+
+        # tz-naive
+        first = DataFrame([[pd.NaT], [pd.NaT]]).apply(lambda x: x.dt.tz_localize(tz1))
+        second = DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2))
+
+        result = concat([first, second], axis=0)
+        expected = DataFrame(Series([pd.NaT, pd.NaT, s], index=[0, 1, 0]))
+        expected = expected.apply(lambda x: x.dt.tz_localize(tz2))
+        if tz1 != tz2:
+            expected = expected.astype(object)
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("tz1", [None, "UTC"])
+    @pytest.mark.parametrize("tz2", [None, "UTC"])
+    def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2):
+        # GH 12396
+
+        first = DataFrame(Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1))
+        second = DataFrame(Series([pd.NaT]).dt.tz_localize(tz2), columns=[1])
+        expected = DataFrame(
+            {
+                0: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1),
+                1: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2),
+            }
+        )
+        result = concat([first, second], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("tz1", [None, "UTC"])
+    @pytest.mark.parametrize("tz2", [None, "UTC"])
+    def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2):
+        # GH 12396
+
+        # tz-naive
+        first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)
+        second = DataFrame(
+            [
+                [Timestamp("2015/01/01", tz=tz2)],
+                [Timestamp("2016/01/01", tz=tz2)],
+            ],
+            index=[2, 3],
+        )
+
+        expected = DataFrame(
+            [
+                pd.NaT,
+                pd.NaT,
+                Timestamp("2015/01/01", tz=tz2),
+                Timestamp("2016/01/01", tz=tz2),
+            ]
+        )
+        if tz1 != tz2:
+            expected = expected.astype(object)
+
+        result = concat([first, second])
+        tm.assert_frame_equal(result, expected)
+
+
+class TestTimezoneConcat:
+    def test_concat_tz_series(self):
+        # gh-11755: tz and no tz
+        x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC"))
+        y = Series(date_range("2012-01-01", "2012-01-02"))
+        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+
+        # gh-11887: concat tz and object
+        x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC"))
+        y = Series(["a", "b"])
+        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+
+        # see gh-12217 and gh-12306
+        # Concatenating two UTC times
+        first = DataFrame([[datetime(2016, 1, 1)]])
+        first[0] = first[0].dt.tz_localize("UTC")
+
+        second = DataFrame([[datetime(2016, 1, 2)]])
+        second[0] = second[0].dt.tz_localize("UTC")
+
+        result = concat([first, second])
+        assert result[0].dtype == "datetime64[ns, UTC]"
+
+        # Concatenating two London times
+        first = DataFrame([[datetime(2016, 1, 1)]])
+        first[0] = first[0].dt.tz_localize("Europe/London")
+
+        second = DataFrame([[datetime(2016, 1, 2)]])
+        second[0] = second[0].dt.tz_localize("Europe/London")
+
+        result = concat([first, second])
+        assert result[0].dtype == "datetime64[ns, Europe/London]"
+
+        # Concatenating 2+1 London times
+        first = DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]])
+        first[0] = first[0].dt.tz_localize("Europe/London")
+
+        second = DataFrame([[datetime(2016, 1, 3)]])
+        second[0] = second[0].dt.tz_localize("Europe/London")
+
+        result = concat([first, second])
+        assert result[0].dtype == "datetime64[ns, Europe/London]"
+
+        # Concat'ing 1+2 London times
+        first = DataFrame([[datetime(2016, 1, 1)]])
+        first[0] = first[0].dt.tz_localize("Europe/London")
+
+        second = DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]])
+        second[0] = second[0].dt.tz_localize("Europe/London")
+
+        result = concat([first, second])
+        assert result[0].dtype == "datetime64[ns, Europe/London]"
+
+    def test_concat_tz_series_tzlocal(self):
+        # see gh-13583
+        x = [
+            Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()),
+            Timestamp("2011-02-01", tz=dateutil.tz.tzlocal()),
+        ]
+        y = [
+            Timestamp("2012-01-01", tz=dateutil.tz.tzlocal()),
+            Timestamp("2012-02-01", tz=dateutil.tz.tzlocal()),
+        ]
+
+        result = concat([Series(x), Series(y)], ignore_index=True)
+        tm.assert_series_equal(result, Series(x + y))
+        assert result.dtype == "datetime64[ns, tzlocal()]"
+
+    def test_concat_tz_series_with_datetimelike(self):
+        # see gh-12620: tz and timedelta
+        x = [
+            Timestamp("2011-01-01", tz="US/Eastern"),
+            Timestamp("2011-02-01", tz="US/Eastern"),
+        ]
+        y = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]
+        result = concat([Series(x), Series(y)], ignore_index=True)
+        tm.assert_series_equal(result, Series(x + y, dtype="object"))
+
+        # tz and period
+        y = [pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M")]
+        result = concat([Series(x), Series(y)], ignore_index=True)
+        tm.assert_series_equal(result, Series(x + y, dtype="object"))
+
+    def test_concat_tz_frame(self):
+        df2 = DataFrame(
+            {
+                "A": Timestamp("20130102", tz="US/Eastern"),
+                "B": Timestamp("20130603", tz="CET"),
+            },
+            index=range(5),
+        )
+
+        # concat
+        df3 = concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
+        tm.assert_frame_equal(df2, df3)
+
+    def test_concat_multiple_tzs(self):
+        # GH#12467
+        # combining datetime tz-aware and naive DataFrames
+        ts1 = Timestamp("2015-01-01", tz=None)
+        ts2 = Timestamp("2015-01-01", tz="UTC")
+        ts3 = Timestamp("2015-01-01", tz="EST")
+
+        df1 = DataFrame({"time": [ts1]})
+        df2 = DataFrame({"time": [ts2]})
+        df3 = DataFrame({"time": [ts3]})
+
+        results = concat([df1, df2]).reset_index(drop=True)
+        expected = DataFrame({"time": [ts1, ts2]}, dtype=object)
+        tm.assert_frame_equal(results, expected)
+
+        results = concat([df1, df3]).reset_index(drop=True)
+        expected = DataFrame({"time": [ts1, ts3]}, dtype=object)
+        tm.assert_frame_equal(results, expected)
+
+        results = concat([df2, df3]).reset_index(drop=True)
+        expected = DataFrame({"time": [ts2, ts3]})
+        tm.assert_frame_equal(results, expected)
+
+    @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning")
+    def test_concat_multiindex_with_tz(self):
+        # GH 6606
+        df = DataFrame(
+            {
+                "dt": [
+                    datetime(2014, 1, 1),
+                    datetime(2014, 1, 2),
+                    datetime(2014, 1, 3),
+                ],
+                "b": ["A", "B", "C"],
+                "c": [1, 2, 3],
+                "d": [4, 5, 6],
+            }
+        )
+        df["dt"] = df["dt"].apply(lambda d: Timestamp(d, tz="US/Pacific"))
+        df = df.set_index(["dt", "b"])
+
+        exp_idx1 = DatetimeIndex(
+            ["2014-01-01", "2014-01-02", "2014-01-03"] * 2, tz="US/Pacific", name="dt"
+        )
+        exp_idx2 = Index(["A", "B", "C"] * 2, name="b")
+        exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
+        expected = DataFrame(
+            {"c": [1, 2, 3] * 2, "d": [4, 5, 6] * 2}, index=exp_idx, columns=["c", "d"]
+        )
+
+        result = concat([df, df])
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_tz_not_aligned(self):
+        # GH#22796
+        ts = pd.to_datetime([1, 2]).tz_localize("UTC")
+        a = DataFrame({"A": ts})
+        b = DataFrame({"A": ts, "B": ts})
+        result = concat([a, b], sort=True, ignore_index=True)
+        expected = DataFrame(
+            {"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)}
+        )
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "t1",
+        [
+            "2015-01-01",
+            pytest.param(
+                pd.NaT,
+                marks=pytest.mark.xfail(
+                    reason="GH23037 incorrect dtype when concatenating"
+                ),
+            ),
+        ],
+    )
+    def test_concat_tz_NaT(self, t1):
+        # GH#22796
+        # Concatenating tz-aware multicolumn DataFrames
+        ts1 = Timestamp(t1, tz="UTC")
+        ts2 = Timestamp("2015-01-01", tz="UTC")
+        ts3 = Timestamp("2015-01-01", tz="UTC")
+
+        df1 = DataFrame([[ts1, ts2]])
+        df2 = DataFrame([[ts3]])
+
+        result = concat([df1, df2])
+        expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_tz_with_empty(self):
+        # GH 9188
+        result = concat(
+            [DataFrame(date_range("2000", periods=1, tz="UTC")), DataFrame()]
+        )
+        expected = DataFrame(date_range("2000", periods=1, tz="UTC"))
+        tm.assert_frame_equal(result, expected)
+
+
+class TestPeriodConcat:
+    def test_concat_period_series(self):
+        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
+        y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="D"))
+        expected = Series([x[0], x[1], y[0], y[1]], dtype="Period[D]")
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+
+    def test_concat_period_multiple_freq_series(self):
+        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
+        y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="M"))
+        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+        assert result.dtype == "object"
+
+    def test_concat_period_other_series(self):
+        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
+        y = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="M"))
+        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+        assert result.dtype == "object"
+
+        # non-period
+        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
+        y = Series(DatetimeIndex(["2015-11-01", "2015-12-01"]))
+        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+        assert result.dtype == "object"
+
+        x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
+        y = Series(["A", "B"])
+        expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
+        result = concat([x, y], ignore_index=True)
+        tm.assert_series_equal(result, expected)
+        assert result.dtype == "object"
+
+
+def test_concat_timedelta64_block():
+    rng = to_timedelta(np.arange(10), unit="s")
+
+    df = DataFrame({"time": rng})
+
+    result = concat([df, df])
+    tm.assert_frame_equal(result.iloc[:10], df)
+    tm.assert_frame_equal(result.iloc[10:], df)
+
+
+def test_concat_multiindex_datetime_nat():
+    # GH#44900
+    left = DataFrame({"a": 1}, index=MultiIndex.from_tuples([(1, pd.NaT)]))
+    right = DataFrame(
+        {"b": 2}, index=MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
+    )
+    result = concat([left, right], axis="columns")
+    expected = DataFrame(
+        {"a": [1.0, np.nan], "b": 2}, MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
+    )
+    tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/concat/test_empty.py
+++ b/dist/client/pandas/tests/reshape/concat/test_empty.py
@@ -0,0 +1,283 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    Series,
+    concat,
+    date_range,
+)
+import pandas._testing as tm
+
+
+class TestEmptyConcat:
+    def test_handle_empty_objects(self, sort):
+        df = DataFrame(np.random.randn(10, 4), columns=list("abcd"))
+
+        baz = df[:5].copy()
+        baz["foo"] = "bar"
+        empty = df[5:5]
+
+        frames = [baz, empty, empty, df[5:]]
+        concatted = concat(frames, axis=0, sort=sort)
+
+        expected = df.reindex(columns=["a", "b", "c", "d", "foo"])
+        expected["foo"] = expected["foo"].astype("O")
+        expected.loc[0:4, "foo"] = "bar"
+
+        tm.assert_frame_equal(concatted, expected)
+
+        # empty as first element with time series
+        # GH3259
+        df = DataFrame(
+            {"A": range(10000)}, index=date_range("20130101", periods=10000, freq="s")
+        )
+        empty = DataFrame()
+        result = concat([df, empty], axis=1)
+        tm.assert_frame_equal(result, df)
+        result = concat([empty, df], axis=1)
+        tm.assert_frame_equal(result, df)
+
+        result = concat([df, empty])
+        tm.assert_frame_equal(result, df)
+        result = concat([empty, df])
+        tm.assert_frame_equal(result, df)
+
+    def test_concat_empty_series(self):
+        # GH 11082
+        s1 = Series([1, 2, 3], name="x")
+        s2 = Series(name="y", dtype="float64")
+        res = concat([s1, s2], axis=1)
+        exp = DataFrame(
+            {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]},
+            index=Index([0, 1, 2], dtype="O"),
+        )
+        tm.assert_frame_equal(res, exp)
+
+        s1 = Series([1, 2, 3], name="x")
+        s2 = Series(name="y", dtype="float64")
+        res = concat([s1, s2], axis=0)
+        # name will be reset
+        exp = Series([1, 2, 3])
+        tm.assert_series_equal(res, exp)
+
+        # empty Series with no name
+        s1 = Series([1, 2, 3], name="x")
+        s2 = Series(name=None, dtype="float64")
+        res = concat([s1, s2], axis=1)
+        exp = DataFrame(
+            {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
+            columns=["x", 0],
+            index=Index([0, 1, 2], dtype="O"),
+        )
+        tm.assert_frame_equal(res, exp)
+
+    @pytest.mark.parametrize("tz", [None, "UTC"])
+    @pytest.mark.parametrize("values", [[], [1, 2, 3]])
+    def test_concat_empty_series_timelike(self, tz, values):
+        # GH 18447
+
+        first = Series([], dtype="M8[ns]").dt.tz_localize(tz)
+        dtype = None if values else np.float64
+        second = Series(values, dtype=dtype)
+
+        expected = DataFrame(
+            {
+                0: Series([pd.NaT] * len(values), dtype="M8[ns]").dt.tz_localize(tz),
+                1: values,
+            }
+        )
+        result = concat([first, second], axis=1)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "left,right,expected",
+        [
+            # booleans
+            (np.bool_, np.int32, np.int32),
+            (np.bool_, np.float32, np.object_),
+            # datetime-like
+            ("m8[ns]", np.bool_, np.object_),
+            ("m8[ns]", np.int64, np.object_),
+            ("M8[ns]", np.bool_, np.object_),
+            ("M8[ns]", np.int64, np.object_),
+            # categorical
+            ("category", "category", "category"),
+            ("category", "object", "object"),
+        ],
+    )
+    def test_concat_empty_series_dtypes(self, left, right, expected):
+        warn = None
+        if (left is np.bool_ or right is np.bool_) and expected is not np.object_:
+            warn = FutureWarning
+        with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
+            # GH#39817
+            result = concat([Series(dtype=left), Series(dtype=right)])
+        assert result.dtype == expected
+
+    @pytest.mark.parametrize(
+        "dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"]
+    )
+    def test_concat_empty_series_dtypes_match_roundtrips(self, dtype):
+        dtype = np.dtype(dtype)
+
+        result = concat([Series(dtype=dtype)])
+        assert result.dtype == dtype
+
+        result = concat([Series(dtype=dtype), Series(dtype=dtype)])
+        assert result.dtype == dtype
+
+    def test_concat_empty_series_dtypes_roundtrips(self):
+
+        # round-tripping with self & like self
+        dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"])
+
+        def int_result_type(dtype, dtype2):
+            typs = {dtype.kind, dtype2.kind}
+            if not len(typs - {"i", "u", "b"}) and (
+                dtype.kind == "i" or dtype2.kind == "i"
+            ):
+                return "i"
+            elif not len(typs - {"u", "b"}) and (
+                dtype.kind == "u" or dtype2.kind == "u"
+            ):
+                return "u"
+            return None
+
+        def float_result_type(dtype, dtype2):
+            typs = {dtype.kind, dtype2.kind}
+            if not len(typs - {"f", "i", "u"}) and (
+                dtype.kind == "f" or dtype2.kind == "f"
+            ):
+                return "f"
+            return None
+
+        def get_result_type(dtype, dtype2):
+            result = float_result_type(dtype, dtype2)
+            if result is not None:
+                return result
+            result = int_result_type(dtype, dtype2)
+            if result is not None:
+                return result
+            return "O"
+
+        for dtype in dtypes:
+            for dtype2 in dtypes:
+                if dtype == dtype2:
+                    continue
+
+                expected = get_result_type(dtype, dtype2)
+                result = concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
+                assert result.kind == expected
+
+    def test_concat_empty_series_dtypes_triple(self):
+
+        assert (
+            concat(
+                [Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)]
+            ).dtype
+            == np.object_
+        )
+
+    def test_concat_empty_series_dtype_category_with_array(self):
+        # GH#18515
+        assert (
+            concat(
+                [Series(np.array([]), dtype="category"), Series(dtype="float64")]
+            ).dtype
+            == "float64"
+        )
+
+    def test_concat_empty_series_dtypes_sparse(self):
+        result = concat(
+            [
+                Series(dtype="float64").astype("Sparse"),
+                Series(dtype="float64").astype("Sparse"),
+            ]
+        )
+        assert result.dtype == "Sparse[float64]"
+
+        result = concat(
+            [Series(dtype="float64").astype("Sparse"), Series(dtype="float64")]
+        )
+        expected = pd.SparseDtype(np.float64)
+        assert result.dtype == expected
+
+        result = concat(
+            [Series(dtype="float64").astype("Sparse"), Series(dtype="object")]
+        )
+        expected = pd.SparseDtype("object")
+        assert result.dtype == expected
+
+    def test_concat_empty_df_object_dtype(self):
+        # GH 9149
+        df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
+        df_2 = DataFrame(columns=df_1.columns)
+        result = concat([df_1, df_2], axis=0)
+        expected = df_1.astype(object)
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_empty_dataframe_dtypes(self):
+        df = DataFrame(columns=list("abc"))
+        df["a"] = df["a"].astype(np.bool_)
+        df["b"] = df["b"].astype(np.int32)
+        df["c"] = df["c"].astype(np.float64)
+
+        result = concat([df, df])
+        assert result["a"].dtype == np.bool_
+        assert result["b"].dtype == np.int32
+        assert result["c"].dtype == np.float64
+
+        result = concat([df, df.astype(np.float64)])
+        assert result["a"].dtype == np.object_
+        assert result["b"].dtype == np.float64
+        assert result["c"].dtype == np.float64
+
+    def test_concat_inner_join_empty(self):
+        # GH 15328
+        df_empty = DataFrame()
+        df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64")
+        df_expected = DataFrame({"a": []}, index=[], dtype="int64")
+
+        for how, expected in [("inner", df_expected), ("outer", df_a)]:
+            result = concat([df_a, df_empty], axis=1, join=how)
+            tm.assert_frame_equal(result, expected)
+
+    def test_empty_dtype_coerce(self):
+
+        # xref to #12411
+        # xref to #12045
+        # xref to #11594
+        # see below
+
+        # 10571
+        df1 = DataFrame(data=[[1, None], [2, None]], columns=["a", "b"])
+        df2 = DataFrame(data=[[3, None], [4, None]], columns=["a", "b"])
+        result = concat([df1, df2])
+        expected = df1.dtypes
+        tm.assert_series_equal(result.dtypes, expected)
+
+    def test_concat_empty_dataframe(self):
+        # 39037
+        df1 = DataFrame(columns=["a", "b"])
+        df2 = DataFrame(columns=["b", "c"])
+        result = concat([df1, df2, df1])
+        expected = DataFrame(columns=["a", "b", "c"])
+        tm.assert_frame_equal(result, expected)
+
+        df3 = DataFrame(columns=["a", "b"])
+        df4 = DataFrame(columns=["b"])
+        result = concat([df3, df4])
+        expected = DataFrame(columns=["a", "b"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_empty_dataframe_different_dtypes(self):
+        # 39037
+        df1 = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+        df2 = DataFrame({"a": [1, 2, 3]})
+
+        result = concat([df1[:0], df2[:0]])
+        assert result["a"].dtype == np.int64
+        assert result["b"].dtype == np.object_
--- a/dist/client/pandas/tests/reshape/concat/test_index.py
+++ b/dist/client/pandas/tests/reshape/concat/test_index.py
@@ -0,0 +1,319 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    concat,
+)
+import pandas._testing as tm
+
+
+class TestIndexConcat:
+    def test_concat_ignore_index(self, sort):
+        frame1 = DataFrame(
+            {"test1": ["a", "b", "c"], "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]}
+        )
+        frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
+        frame1.index = Index(["x", "y", "z"])
+        frame2.index = Index(["x", "y", "q"])
+
+        v1 = concat([frame1, frame2], axis=1, ignore_index=True, sort=sort)
+
+        nan = np.nan
+        expected = DataFrame(
+            [
+                [nan, nan, nan, 4.3],
+                ["a", 1, 4.5, 5.2],
+                ["b", 2, 3.2, 2.2],
+                ["c", 3, 1.2, nan],
+            ],
+            index=Index(["q", "x", "y", "z"]),
+        )
+        if not sort:
+            expected = expected.loc[["x", "y", "z", "q"]]
+
+        tm.assert_frame_equal(v1, expected)
+
+    @pytest.mark.parametrize(
+        "name_in1,name_in2,name_in3,name_out",
+        [
+            ("idx", "idx", "idx", "idx"),
+            ("idx", "idx", None, None),
+            ("idx", None, None, None),
+            ("idx1", "idx2", None, None),
+            ("idx1", "idx1", "idx2", None),
+            ("idx1", "idx2", "idx3", None),
+            (None, None, None, None),
+        ],
+    )
+    def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out):
+        # GH13475
+        indices = [
+            Index(["a", "b", "c"], name=name_in1),
+            Index(["b", "c", "d"], name=name_in2),
+            Index(["c", "d", "e"], name=name_in3),
+        ]
+        frames = [
+            DataFrame({c: [0, 1, 2]}, index=i) for i, c in zip(indices, ["x", "y", "z"])
+        ]
+        result = concat(frames, axis=1)
+
+        exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out)
+        expected = DataFrame(
+            {
+                "x": [0, 1, 2, np.nan, np.nan],
+                "y": [np.nan, 0, 1, 2, np.nan],
+                "z": [np.nan, np.nan, 0, 1, 2],
+            },
+            index=exp_ind,
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_rename_index(self):
+        a = DataFrame(
+            np.random.rand(3, 3),
+            columns=list("ABC"),
+            index=Index(list("abc"), name="index_a"),
+        )
+        b = DataFrame(
+            np.random.rand(3, 3),
+            columns=list("ABC"),
+            index=Index(list("abc"), name="index_b"),
+        )
+
+        result = concat([a, b], keys=["key0", "key1"], names=["lvl0", "lvl1"])
+
+        exp = concat([a, b], keys=["key0", "key1"], names=["lvl0"])
+        names = list(exp.index.names)
+        names[1] = "lvl1"
+        exp.index.set_names(names, inplace=True)
+
+        tm.assert_frame_equal(result, exp)
+        assert result.index.names == exp.index.names
+
+    def test_concat_copy_index_series(self, axis):
+        # GH 29879
+        ser = Series([1, 2])
+        comb = concat([ser, ser], axis=axis, copy=True)
+        assert comb.index is not ser.index
+
+    def test_concat_copy_index_frame(self, axis):
+        # GH 29879
+        df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
+        comb = concat([df, df], axis=axis, copy=True)
+        assert comb.index is not df.index
+        assert comb.columns is not df.columns
+
+    def test_default_index(self):
+        # is_series and ignore_index
+        s1 = Series([1, 2, 3], name="x")
+        s2 = Series([4, 5, 6], name="y")
+        res = concat([s1, s2], axis=1, ignore_index=True)
+        assert isinstance(res.columns, pd.RangeIndex)
+        exp = DataFrame([[1, 4], [2, 5], [3, 6]])
+        # use check_index_type=True to check the result have
+        # RangeIndex (default index)
+        tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
+
+        # is_series and all inputs have no names
+        s1 = Series([1, 2, 3])
+        s2 = Series([4, 5, 6])
+        res = concat([s1, s2], axis=1, ignore_index=False)
+        assert isinstance(res.columns, pd.RangeIndex)
+        exp = DataFrame([[1, 4], [2, 5], [3, 6]])
+        exp.columns = pd.RangeIndex(2)
+        tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
+
+        # is_dataframe and ignore_index
+        df1 = DataFrame({"A": [1, 2], "B": [5, 6]})
+        df2 = DataFrame({"A": [3, 4], "B": [7, 8]})
+
+        res = concat([df1, df2], axis=0, ignore_index=True)
+        exp = DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"])
+        tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
+
+        res = concat([df1, df2], axis=1, ignore_index=True)
+        exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
+        tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
+
+    def test_dups_index(self):
+        # GH 4771
+
+        # single dtypes
+        df = DataFrame(
+            np.random.randint(0, 10, size=40).reshape(10, 4),
+            columns=["A", "A", "C", "C"],
+        )
+
+        result = concat([df, df], axis=1)
+        tm.assert_frame_equal(result.iloc[:, :4], df)
+        tm.assert_frame_equal(result.iloc[:, 4:], df)
+
+        result = concat([df, df], axis=0)
+        tm.assert_frame_equal(result.iloc[:10], df)
+        tm.assert_frame_equal(result.iloc[10:], df)
+
+        # multi dtypes
+        df = concat(
+            [
+                DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]),
+                DataFrame(
+                    np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
+                ),
+            ],
+            axis=1,
+        )
+
+        result = concat([df, df], axis=1)
+        tm.assert_frame_equal(result.iloc[:, :6], df)
+        tm.assert_frame_equal(result.iloc[:, 6:], df)
+
+        result = concat([df, df], axis=0)
+        tm.assert_frame_equal(result.iloc[:10], df)
+        tm.assert_frame_equal(result.iloc[10:], df)
+
+        # append
+        result = df.iloc[0:8, :]._append(df.iloc[8:])
+        tm.assert_frame_equal(result, df)
+
+        result = df.iloc[0:8, :]._append(df.iloc[8:9])._append(df.iloc[9:10])
+        tm.assert_frame_equal(result, df)
+
+        expected = concat([df, df], axis=0)
+        result = df._append(df)
+        tm.assert_frame_equal(result, expected)
+
+
+class TestMultiIndexConcat:
+    def test_concat_multiindex_with_keys(self, multiindex_dataframe_random_data):
+        frame = multiindex_dataframe_random_data
+        index = frame.index
+        result = concat([frame, frame], keys=[0, 1], names=["iteration"])
+
+        assert result.index.names == ("iteration",) + index.names
+        tm.assert_frame_equal(result.loc[0], frame)
+        tm.assert_frame_equal(result.loc[1], frame)
+        assert result.index.nlevels == 3
+
+    def test_concat_multiindex_with_none_in_index_names(self):
+        # GH 15787
+        index = MultiIndex.from_product([[1], range(5)], names=["level1", None])
+        df = DataFrame({"col": range(5)}, index=index, dtype=np.int32)
+
+        result = concat([df, df], keys=[1, 2], names=["level2"])
+        index = MultiIndex.from_product(
+            [[1, 2], [1], range(5)], names=["level2", "level1", None]
+        )
+        expected = DataFrame({"col": list(range(5)) * 2}, index=index, dtype=np.int32)
+        tm.assert_frame_equal(result, expected)
+
+        result = concat([df, df[:2]], keys=[1, 2], names=["level2"])
+        level2 = [1] * 5 + [2] * 2
+        level1 = [1] * 7
+        no_name = list(range(5)) + list(range(2))
+        tuples = list(zip(level2, level1, no_name))
+        index = MultiIndex.from_tuples(tuples, names=["level2", "level1", None])
+        expected = DataFrame({"col": no_name}, index=index, dtype=np.int32)
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_multiindex_rangeindex(self):
+        # GH13542
+        # when multi-index levels are RangeIndex objects
+        # there is a bug in concat with objects of len 1
+
+        df = DataFrame(np.random.randn(9, 2))
+        df.index = MultiIndex(
+            levels=[pd.RangeIndex(3), pd.RangeIndex(3)],
+            codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)],
+        )
+
+        res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]])
+        exp = df.iloc[[2, 3, 4, 5], :]
+        tm.assert_frame_equal(res, exp)
+
+    def test_concat_multiindex_dfs_with_deepcopy(self):
+        # GH 9967
+        from copy import deepcopy
+
+        example_multiindex1 = MultiIndex.from_product([["a"], ["b"]])
+        example_dataframe1 = DataFrame([0], index=example_multiindex1)
+
+        example_multiindex2 = MultiIndex.from_product([["a"], ["c"]])
+        example_dataframe2 = DataFrame([1], index=example_multiindex2)
+
+        example_dict = {"s1": example_dataframe1, "s2": example_dataframe2}
+        expected_index = MultiIndex(
+            levels=[["s1", "s2"], ["a"], ["b", "c"]],
+            codes=[[0, 1], [0, 0], [0, 1]],
+            names=["testname", None, None],
+        )
+        expected = DataFrame([[0], [1]], index=expected_index)
+        result_copy = concat(deepcopy(example_dict), names=["testname"])
+        tm.assert_frame_equal(result_copy, expected)
+        result_no_copy = concat(example_dict, names=["testname"])
+        tm.assert_frame_equal(result_no_copy, expected)
+
+    @pytest.mark.parametrize(
+        "mi1_list",
+        [
+            [["a"], range(2)],
+            [["b"], np.arange(2.0, 4.0)],
+            [["c"], ["A", "B"]],
+            [["d"], pd.date_range(start="2017", end="2018", periods=2)],
+        ],
+    )
+    @pytest.mark.parametrize(
+        "mi2_list",
+        [
+            [["a"], range(2)],
+            [["b"], np.arange(2.0, 4.0)],
+            [["c"], ["A", "B"]],
+            [["d"], pd.date_range(start="2017", end="2018", periods=2)],
+        ],
+    )
+    def test_concat_with_various_multiindex_dtypes(
+        self, mi1_list: list, mi2_list: list
+    ):
+        # GitHub #23478
+        mi1 = MultiIndex.from_product(mi1_list)
+        mi2 = MultiIndex.from_product(mi2_list)
+
+        df1 = DataFrame(np.zeros((1, len(mi1))), columns=mi1)
+        df2 = DataFrame(np.zeros((1, len(mi2))), columns=mi2)
+
+        if mi1_list[0] == mi2_list[0]:
+            expected_mi = MultiIndex(
+                levels=[mi1_list[0], list(mi1_list[1])],
+                codes=[[0, 0, 0, 0], [0, 1, 0, 1]],
+            )
+        else:
+            expected_mi = MultiIndex(
+                levels=[
+                    mi1_list[0] + mi2_list[0],
+                    list(mi1_list[1]) + list(mi2_list[1]),
+                ],
+                codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
+            )
+
+        expected_df = DataFrame(np.zeros((1, len(expected_mi))), columns=expected_mi)
+
+        with tm.assert_produces_warning(None):
+            result_df = concat((df1, df2), axis=1)
+
+        tm.assert_frame_equal(expected_df, result_df)
+
+    def test_concat_range_index_result(self):
+        # GH#47501
+        df1 = DataFrame({"a": [1, 2]})
+        df2 = DataFrame({"b": [1, 2]})
+
+        result = concat([df1, df2], sort=True, axis=1)
+        expected = DataFrame({"a": [1, 2], "b": [1, 2]})
+        tm.assert_frame_equal(result, expected)
+        expected_index = pd.RangeIndex(0, 2)
+        tm.assert_index_equal(result.index, expected_index, exact=True)
--- a/dist/client/pandas/tests/reshape/concat/test_invalid.py
+++ b/dist/client/pandas/tests/reshape/concat/test_invalid.py
@@ -0,0 +1,56 @@
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    concat,
+    read_csv,
+)
+import pandas._testing as tm
+
+
+class TestInvalidConcat:
+    def test_concat_invalid(self):
+
+        # trying to concat a ndframe with a non-ndframe
+        df1 = tm.makeCustomDataframe(10, 2)
+        for obj in [1, {}, [1, 2], (1, 2)]:
+
+            msg = (
+                f"cannot concatenate object of type '{type(obj)}'; "
+                "only Series and DataFrame objs are valid"
+            )
+            with pytest.raises(TypeError, match=msg):
+                concat([df1, obj])
+
+    def test_concat_invalid_first_argument(self):
+        df1 = tm.makeCustomDataframe(10, 2)
+        msg = (
+            "first argument must be an iterable of pandas "
+            'objects, you passed an object of type "DataFrame"'
+        )
+        with pytest.raises(TypeError, match=msg):
+            concat(df1)
+
+    def test_concat_generator_obj(self):
+        # generator ok though
+        concat(DataFrame(np.random.rand(5, 5)) for _ in range(3))
+
+    def test_concat_textreader_obj(self):
+        # text reader ok
+        # GH6583
+        data = """index,A,B,C,D
+                  foo,2,3,4,5
+                  bar,7,8,9,10
+                  baz,12,13,14,15
+                  qux,12,13,14,15
+                  foo2,12,13,14,15
+                  bar2,12,13,14,15
+               """
+
+        with read_csv(StringIO(data), chunksize=1) as reader:
+            result = concat(reader, ignore_index=True)
+        expected = read_csv(StringIO(data))
+        tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/concat/test_series.py
+++ b/dist/client/pandas/tests/reshape/concat/test_series.py
@@ -0,0 +1,148 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    MultiIndex,
+    Series,
+    concat,
+    date_range,
+)
+import pandas._testing as tm
+
+
+class TestSeriesConcat:
+    def test_concat_series(self):
+
+        ts = tm.makeTimeSeries()
+        ts.name = "foo"
+
+        pieces = [ts[:5], ts[5:15], ts[15:]]
+
+        result = concat(pieces)
+        tm.assert_series_equal(result, ts)
+        assert result.name == ts.name
+
+        result = concat(pieces, keys=[0, 1, 2])
+        expected = ts.copy()
+
+        ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]"))
+
+        exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))]
+        exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes)
+        expected.index = exp_index
+        tm.assert_series_equal(result, expected)
+
+    def test_concat_empty_and_non_empty_series_regression(self):
+        # GH 18187 regression test
+        s1 = Series([1])
+        s2 = Series([], dtype=object)
+
+        expected = s1
+        result = concat([s1, s2])
+        tm.assert_series_equal(result, expected)
+
+    def test_concat_series_axis1(self):
+        ts = tm.makeTimeSeries()
+
+        pieces = [ts[:-2], ts[2:], ts[2:-2]]
+
+        result = concat(pieces, axis=1)
+        expected = DataFrame(pieces).T
+        tm.assert_frame_equal(result, expected)
+
+        result = concat(pieces, keys=["A", "B", "C"], axis=1)
+        expected = DataFrame(pieces, index=["A", "B", "C"]).T
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_series_axis1_preserves_series_names(self):
+        # preserve series names, #2489
+        s = Series(np.random.randn(5), name="A")
+        s2 = Series(np.random.randn(5), name="B")
+
+        result = concat([s, s2], axis=1)
+        expected = DataFrame({"A": s, "B": s2})
+        tm.assert_frame_equal(result, expected)
+
+        s2.name = None
+        result = concat([s, s2], axis=1)
+        tm.assert_index_equal(result.columns, Index(["A", 0], dtype="object"))
+
+    def test_concat_series_axis1_with_reindex(self, sort):
+        # must reindex, #2603
+        s = Series(np.random.randn(3), index=["c", "a", "b"], name="A")
+        s2 = Series(np.random.randn(4), index=["d", "a", "b", "c"], name="B")
+        result = concat([s, s2], axis=1, sort=sort)
+        expected = DataFrame({"A": s, "B": s2}, index=["c", "a", "b", "d"])
+        if sort:
+            expected = expected.sort_index()
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_series_axis1_names_applied(self):
+        # ensure names argument is not ignored on axis=1, #23490
+        s = Series([1, 2, 3])
+        s2 = Series([4, 5, 6])
+        result = concat([s, s2], axis=1, keys=["a", "b"], names=["A"])
+        expected = DataFrame(
+            [[1, 4], [2, 5], [3, 6]], columns=Index(["a", "b"], name="A")
+        )
+        tm.assert_frame_equal(result, expected)
+
+        result = concat([s, s2], axis=1, keys=[("a", 1), ("b", 2)], names=["A", "B"])
+        expected = DataFrame(
+            [[1, 4], [2, 5], [3, 6]],
+            columns=MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["A", "B"]),
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_series_axis1_same_names_ignore_index(self):
+        dates = date_range("01-Jan-2013", "01-Jan-2014", freq="MS")[0:-1]
+        s1 = Series(np.random.randn(len(dates)), index=dates, name="value")
+        s2 = Series(np.random.randn(len(dates)), index=dates, name="value")
+
+        result = concat([s1, s2], axis=1, ignore_index=True)
+        expected = Index(range(2))
+
+        tm.assert_index_equal(result.columns, expected, exact=True)
+
+    @pytest.mark.parametrize(
+        "s1name,s2name", [(np.int64(190), (43, 0)), (190, (43, 0))]
+    )
+    def test_concat_series_name_npscalar_tuple(self, s1name, s2name):
+        # GH21015
+        s1 = Series({"a": 1, "b": 2}, name=s1name)
+        s2 = Series({"c": 5, "d": 6}, name=s2name)
+        result = concat([s1, s2])
+        expected = Series({"a": 1, "b": 2, "c": 5, "d": 6})
+        tm.assert_series_equal(result, expected)
+
+    def test_concat_series_partial_columns_names(self):
+        # GH10698
+        foo = Series([1, 2], name="foo")
+        bar = Series([1, 2])
+        baz = Series([4, 5])
+
+        result = concat([foo, bar, baz], axis=1)
+        expected = DataFrame(
+            {"foo": [1, 2], 0: [1, 2], 1: [4, 5]}, columns=["foo", 0, 1]
+        )
+        tm.assert_frame_equal(result, expected)
+
+        result = concat([foo, bar, baz], axis=1, keys=["red", "blue", "yellow"])
+        expected = DataFrame(
+            {"red": [1, 2], "blue": [1, 2], "yellow": [4, 5]},
+            columns=["red", "blue", "yellow"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+        result = concat([foo, bar, baz], axis=1, ignore_index=True)
+        expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_series_length_one_reversed(self, frame_or_series):
+        # GH39401
+        obj = frame_or_series([100])
+        result = concat([obj.iloc[::-1]])
+        tm.assert_equal(result, obj)
--- a/dist/client/pandas/tests/reshape/concat/test_sort.py
+++ b/dist/client/pandas/tests/reshape/concat/test_sort.py
@@ -0,0 +1,100 @@
+import numpy as np
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+
+
+class TestConcatSort:
+    def test_concat_sorts_columns(self, sort):
+        # GH-4588
+        df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
+        df2 = DataFrame({"a": [3, 4], "c": [5, 6]})
+
+        # for sort=True/None
+        expected = DataFrame(
+            {"a": [1, 2, 3, 4], "b": [1, 2, None, None], "c": [None, None, 5, 6]},
+            columns=["a", "b", "c"],
+        )
+
+        if sort is False:
+            expected = expected[["b", "a", "c"]]
+
+        # default
+        with tm.assert_produces_warning(None):
+            result = pd.concat([df1, df2], ignore_index=True, sort=sort)
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_sorts_index(self, sort):
+        df1 = DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"])
+        df2 = DataFrame({"b": [1, 2]}, index=["a", "b"])
+
+        # For True/None
+        expected = DataFrame(
+            {"a": [2, 3, 1], "b": [1, 2, None]},
+            index=["a", "b", "c"],
+            columns=["a", "b"],
+        )
+        if sort is False:
+            expected = expected.loc[["c", "a", "b"]]
+
+        # Warn and sort by default
+        with tm.assert_produces_warning(None):
+            result = pd.concat([df1, df2], axis=1, sort=sort)
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_inner_sort(self, sort):
+        # https://github.com/pandas-dev/pandas/pull/20613
+        df1 = DataFrame(
+            {"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"]
+        )
+        df2 = DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4])
+
+        with tm.assert_produces_warning(None):
+            # unset sort should *not* warn for inner join
+            # since that never sorted
+            result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True)
+
+        expected = DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"])
+        if sort is True:
+            expected = expected[["a", "b"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_aligned_sort(self):
+        # GH-4588
+        df = DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"])
+        result = pd.concat([df, df], sort=True, ignore_index=True)
+        expected = DataFrame(
+            {"a": [5, 6, 5, 6], "b": [3, 4, 3, 4], "c": [1, 2, 1, 2]},
+            columns=["a", "b", "c"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+        result = pd.concat(
+            [df, df[["c", "b"]]], join="inner", sort=True, ignore_index=True
+        )
+        expected = expected[["b", "c"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_aligned_sort_does_not_raise(self):
+        # GH-4588
+        # We catch TypeErrors from sorting internally and do not re-raise.
+        df = DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"])
+        expected = DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"])
+        result = pd.concat([df, df], ignore_index=True, sort=True)
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_frame_with_sort_false(self):
+        # GH 43375
+        result = pd.concat(
+            [DataFrame({i: i}, index=[i]) for i in range(2, 0, -1)], sort=False
+        )
+        expected = DataFrame([[2, np.nan], [np.nan, 1]], index=[2, 1], columns=[2, 1])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_concat_sort_none_warning(self):
+        # GH#41518
+        df = DataFrame({1: [1, 2], "a": [3, 4]})
+        with tm.assert_produces_warning(FutureWarning, match="sort"):
+            pd.concat([df, df], sort=None)
--- a/dist/client/pandas/tests/reshape/merge/init.py
+++ b/dist/client/pandas/tests/reshape/merge/init.py
--- a/dist/client/pandas/tests/reshape/merge/pycache/init.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/merge/pycache/init.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/merge/pycache/test_join.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/merge/pycache/test_join.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/merge/pycache/test_merge.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/merge/pycache/test_merge.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/merge/pycache/test_merge_asof.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/merge/pycache/test_merge_asof.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/merge/pycache/test_merge_cross.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/merge/pycache/test_merge_cross.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/merge/pycache/test_merge_index_as_string.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/merge/pycache/test_merge_index_as_string.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/merge/pycache/test_merge_ordered.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/merge/pycache/test_merge_ordered.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/merge/pycache/test_multi.cpython-310.pyc
+++ b/dist/client/pandas/tests/reshape/merge/pycache/test_multi.cpython-310.pyc
--- a/dist/client/pandas/tests/reshape/merge/test_join.py
+++ b/dist/client/pandas/tests/reshape/merge/test_join.py
@@ -0,0 +1,883 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    Timestamp,
+    concat,
+    merge,
+)
+import pandas._testing as tm
+from pandas.tests.reshape.merge.test_merge import (
+    NGROUPS,
+    N,
+    get_test_data,
+)
+
+a_ = np.array
+
+
+class TestJoin:
+    def setup_method(self, method):
+        # aggregate multiple columns
+        self.df = DataFrame(
+            {
+                "key1": get_test_data(),
+                "key2": get_test_data(),
+                "data1": np.random.randn(N),
+                "data2": np.random.randn(N),
+            }
+        )
+
+        # exclude a couple keys for fun
+        self.df = self.df[self.df["key2"] > 1]
+
+        self.df2 = DataFrame(
+            {
+                "key1": get_test_data(n=N // 5),
+                "key2": get_test_data(ngroups=NGROUPS // 2, n=N // 5),
+                "value": np.random.randn(N // 5),
+            }
+        )
+
+        index, data = tm.getMixedTypeDict()
+        self.target = DataFrame(data, index=index)
+
+        # Join on string value
+        self.source = DataFrame(
+            {"MergedA": data["A"], "MergedD": data["D"]}, index=data["C"]
+        )
+
+    def test_left_outer_join(self):
+        joined_key2 = merge(self.df, self.df2, on="key2")
+        _check_join(self.df, self.df2, joined_key2, ["key2"], how="left")
+
+        joined_both = merge(self.df, self.df2)
+        _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="left")
+
+    def test_right_outer_join(self):
+        joined_key2 = merge(self.df, self.df2, on="key2", how="right")
+        _check_join(self.df, self.df2, joined_key2, ["key2"], how="right")
+
+        joined_both = merge(self.df, self.df2, how="right")
+        _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="right")
+
+    def test_full_outer_join(self):
+        joined_key2 = merge(self.df, self.df2, on="key2", how="outer")
+        _check_join(self.df, self.df2, joined_key2, ["key2"], how="outer")
+
+        joined_both = merge(self.df, self.df2, how="outer")
+        _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="outer")
+
+    def test_inner_join(self):
+        joined_key2 = merge(self.df, self.df2, on="key2", how="inner")
+        _check_join(self.df, self.df2, joined_key2, ["key2"], how="inner")
+
+        joined_both = merge(self.df, self.df2, how="inner")
+        _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="inner")
+
+    def test_handle_overlap(self):
+        joined = merge(self.df, self.df2, on="key2", suffixes=(".foo", ".bar"))
+
+        assert "key1.foo" in joined
+        assert "key1.bar" in joined
+
+    def test_handle_overlap_arbitrary_key(self):
+        joined = merge(
+            self.df,
+            self.df2,
+            left_on="key2",
+            right_on="key1",
+            suffixes=(".foo", ".bar"),
+        )
+        assert "key1.foo" in joined
+        assert "key2.bar" in joined
+
+    def test_join_on(self):
+        target = self.target
+        source = self.source
+
+        merged = target.join(source, on="C")
+        tm.assert_series_equal(merged["MergedA"], target["A"], check_names=False)
+        tm.assert_series_equal(merged["MergedD"], target["D"], check_names=False)
+
+        # join with duplicates (fix regression from DataFrame/Matrix merge)
+        df = DataFrame({"key": ["a", "a", "b", "b", "c"]})
+        df2 = DataFrame({"value": [0, 1, 2]}, index=["a", "b", "c"])
+        joined = df.join(df2, on="key")
+        expected = DataFrame(
+            {"key": ["a", "a", "b", "b", "c"], "value": [0, 0, 1, 1, 2]}
+        )
+        tm.assert_frame_equal(joined, expected)
+
+        # Test when some are missing
+        df_a = DataFrame([[1], [2], [3]], index=["a", "b", "c"], columns=["one"])
+        df_b = DataFrame([["foo"], ["bar"]], index=[1, 2], columns=["two"])
+        df_c = DataFrame([[1], [2]], index=[1, 2], columns=["three"])
+        joined = df_a.join(df_b, on="one")
+        joined = joined.join(df_c, on="one")
+        assert np.isnan(joined["two"]["c"])
+        assert np.isnan(joined["three"]["c"])
+
+        # merge column not p resent
+        with pytest.raises(KeyError, match="^'E'$"):
+            target.join(source, on="E")
+
+        # overlap
+        source_copy = source.copy()
+        source_copy["A"] = 0
+        msg = (
+            "You are trying to merge on float64 and object columns. If "
+            "you wish to proceed you should use pd.concat"
+        )
+        with pytest.raises(ValueError, match=msg):
+            target.join(source_copy, on="A")
+
+    def test_join_on_fails_with_different_right_index(self):
+        df = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}
+        )
+        df2 = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)},
+            index=tm.makeCustomIndex(10, 2),
+        )
+        msg = r'len\(left_on\) must equal the number of levels in the index of "right"'
+        with pytest.raises(ValueError, match=msg):
+            merge(df, df2, left_on="a", right_index=True)
+
+    def test_join_on_fails_with_different_left_index(self):
+        df = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)},
+            index=tm.makeCustomIndex(3, 2),
+        )
+        df2 = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}
+        )
+        msg = r'len\(right_on\) must equal the number of levels in the index of "left"'
+        with pytest.raises(ValueError, match=msg):
+            merge(df, df2, right_on="b", left_index=True)
+
+    def test_join_on_fails_with_different_column_counts(self):
+        df = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}
+        )
+        df2 = DataFrame(
+            {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)},
+            index=tm.makeCustomIndex(10, 2),
+        )
+        msg = r"len\(right_on\) must equal len\(left_on\)"
+        with pytest.raises(ValueError, match=msg):
+            merge(df, df2, right_on="a", left_on=["a", "b"])
+
+    @pytest.mark.parametrize("wrong_type", [2, "str", None, np.array([0, 1])])
+    def test_join_on_fails_with_wrong_object_type(self, wrong_type):
+        # GH12081 - original issue
+
+        # GH21220 - merging of Series and DataFrame is now allowed
+        # Edited test to remove the Series object from test parameters
+
+        df = DataFrame({"a": [1, 1]})
+        msg = (
+            "Can only merge Series or DataFrame objects, "
+            f"a {type(wrong_type)} was passed"
+        )
+        with pytest.raises(TypeError, match=msg):
+            merge(wrong_type, df, left_on="a", right_on="a")
+        with pytest.raises(TypeError, match=msg):
+            merge(df, wrong_type, left_on="a", right_on="a")
+
+    def test_join_on_pass_vector(self):
+        expected = self.target.join(self.source, on="C")
+        del expected["C"]
+
+        join_col = self.target.pop("C")
+        result = self.target.join(self.source, on=join_col)
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_with_len0(self):
+        # nothing to merge
+        merged = self.target.join(self.source.reindex([]), on="C")
+        for col in self.source:
+            assert col in merged
+            assert merged[col].isna().all()
+
+        merged2 = self.target.join(self.source.reindex([]), on="C", how="inner")
+        tm.assert_index_equal(merged2.columns, merged.columns)
+        assert len(merged2) == 0
+
+    def test_join_on_inner(self):
+        df = DataFrame({"key": ["a", "a", "d", "b", "b", "c"]})
+        df2 = DataFrame({"value": [0, 1]}, index=["a", "b"])
+
+        joined = df.join(df2, on="key", how="inner")
+
+        expected = df.join(df2, on="key")
+        expected = expected[expected["value"].notna()]
+        tm.assert_series_equal(joined["key"], expected["key"])
+        tm.assert_series_equal(joined["value"], expected["value"], check_dtype=False)
+        tm.assert_index_equal(joined.index, expected.index)
+
+    def test_join_on_singlekey_list(self):
+        df = DataFrame({"key": ["a", "a", "b", "b", "c"]})
+        df2 = DataFrame({"value": [0, 1, 2]}, index=["a", "b", "c"])
+
+        # corner cases
+        joined = df.join(df2, on=["key"])
+        expected = df.join(df2, on="key")
+
+        tm.assert_frame_equal(joined, expected)
+
+    def test_join_on_series(self):
+        result = self.target.join(self.source["MergedA"], on="C")
+        expected = self.target.join(self.source[["MergedA"]], on="C")
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_on_series_buglet(self):
+        # GH #638
+        df = DataFrame({"a": [1, 1]})
+        ds = Series([2], index=[1], name="b")
+        result = df.join(ds, on="a")
+        expected = DataFrame({"a": [1, 1], "b": [2, 2]}, index=df.index)
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_index_mixed(self, join_type):
+        # no overlapping blocks
+        df1 = DataFrame(index=np.arange(10))
+        df1["bool"] = True
+        df1["string"] = "foo"
+
+        df2 = DataFrame(index=np.arange(5, 15))
+        df2["int"] = 1
+        df2["float"] = 1.0
+
+        joined = df1.join(df2, how=join_type)
+        expected = _join_by_hand(df1, df2, how=join_type)
+        tm.assert_frame_equal(joined, expected)
+
+        joined = df2.join(df1, how=join_type)
+        expected = _join_by_hand(df2, df1, how=join_type)
+        tm.assert_frame_equal(joined, expected)
+
+    def test_join_index_mixed_overlap(self):
+        df1 = DataFrame(
+            {"A": 1.0, "B": 2, "C": "foo", "D": True},
+            index=np.arange(10),
+            columns=["A", "B", "C", "D"],
+        )
+        assert df1["B"].dtype == np.int64
+        assert df1["D"].dtype == np.bool_
+
+        df2 = DataFrame(
+            {"A": 1.0, "B": 2, "C": "foo", "D": True},
+            index=np.arange(0, 10, 2),
+            columns=["A", "B", "C", "D"],
+        )
+
+        # overlap
+        joined = df1.join(df2, lsuffix="_one", rsuffix="_two")
+        expected_columns = [
+            "A_one",
+            "B_one",
+            "C_one",
+            "D_one",
+            "A_two",
+            "B_two",
+            "C_two",
+            "D_two",
+        ]
+        df1.columns = expected_columns[:4]
+        df2.columns = expected_columns[4:]
+        expected = _join_by_hand(df1, df2)
+        tm.assert_frame_equal(joined, expected)
+
+    def test_join_empty_bug(self):
+        # generated an exception in 0.4.3
+        x = DataFrame()
+        x.join(DataFrame([3], index=[0], columns=["A"]), how="outer")
+
+    def test_join_unconsolidated(self):
+        # GH #331
+        a = DataFrame(np.random.randn(30, 2), columns=["a", "b"])
+        c = Series(np.random.randn(30))
+        a["c"] = c
+        d = DataFrame(np.random.randn(30, 1), columns=["q"])
+
+        # it works!
+        a.join(d)
+        d.join(a)
+
+    def test_join_multiindex(self):
+        index1 = MultiIndex.from_arrays(
+            [["a", "a", "a", "b", "b", "b"], [1, 2, 3, 1, 2, 3]],
+            names=["first", "second"],
+        )
+
+        index2 = MultiIndex.from_arrays(
+            [["b", "b", "b", "c", "c", "c"], [1, 2, 3, 1, 2, 3]],
+            names=["first", "second"],
+        )
+
+        df1 = DataFrame(data=np.random.randn(6), index=index1, columns=["var X"])
+        df2 = DataFrame(data=np.random.randn(6), index=index2, columns=["var Y"])
+
+        df1 = df1.sort_index(level=0)
+        df2 = df2.sort_index(level=0)
+
+        joined = df1.join(df2, how="outer")
+        ex_index = Index(index1.values).union(Index(index2.values))
+        expected = df1.reindex(ex_index).join(df2.reindex(ex_index))
+        expected.index.names = index1.names
+        tm.assert_frame_equal(joined, expected)
+        assert joined.index.names == index1.names
+
+        df1 = df1.sort_index(level=1)
+        df2 = df2.sort_index(level=1)
+
+        joined = df1.join(df2, how="outer").sort_index(level=0)
+        ex_index = Index(index1.values).union(Index(index2.values))
+        expected = df1.reindex(ex_index).join(df2.reindex(ex_index))
+        expected.index.names = index1.names
+
+        tm.assert_frame_equal(joined, expected)
+        assert joined.index.names == index1.names
+
+    def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex):
+        key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"]
+        key2 = [
+            "two",
+            "one",
+            "three",
+            "one",
+            "two",
+            "one",
+            "two",
+            "two",
+            "three",
+            "one",
+        ]
+
+        data = np.random.randn(len(key1))
+        data = DataFrame({"key1": key1, "key2": key2, "data": data})
+
+        index = lexsorted_two_level_string_multiindex
+        to_join = DataFrame(
+            np.random.randn(10, 3), index=index, columns=["j_one", "j_two", "j_three"]
+        )
+
+        joined = data.join(to_join, on=["key1", "key2"], how="inner")
+        expected = merge(
+            data,
+            to_join.reset_index(),
+            left_on=["key1", "key2"],
+            right_on=["first", "second"],
+            how="inner",
+            sort=False,
+        )
+
+        expected2 = merge(
+            to_join,
+            data,
+            right_on=["key1", "key2"],
+            left_index=True,
+            how="inner",
+            sort=False,
+        )
+        tm.assert_frame_equal(joined, expected2.reindex_like(joined))
+
+        expected2 = merge(
+            to_join,
+            data,
+            right_on=["key1", "key2"],
+            left_index=True,
+            how="inner",
+            sort=False,
+        )
+
+        expected = expected.drop(["first", "second"], axis=1)
+        expected.index = joined.index
+
+        assert joined.index.is_monotonic
+        tm.assert_frame_equal(joined, expected)
+
+        # _assert_same_contents(expected, expected2.loc[:, expected.columns])
+
+    def test_join_hierarchical_mixed(self):
+        # GH 2024
+        df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"])
+        new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]})
+        other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"])
+        other_df.set_index("a", inplace=True)
+        # GH 9455, 12219
+        msg = "merging between different levels is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = merge(new_df, other_df, left_index=True, right_index=True)
+        assert ("b", "mean") in result
+        assert "b" in result
+
+    def test_join_float64_float32(self):
+
+        a = DataFrame(np.random.randn(10, 2), columns=["a", "b"], dtype=np.float64)
+        b = DataFrame(np.random.randn(10, 1), columns=["c"], dtype=np.float32)
+        joined = a.join(b)
+        assert joined.dtypes["a"] == "float64"
+        assert joined.dtypes["b"] == "float64"
+        assert joined.dtypes["c"] == "float32"
+
+        a = np.random.randint(0, 5, 100).astype("int64")
+        b = np.random.random(100).astype("float64")
+        c = np.random.random(100).astype("float32")
+        df = DataFrame({"a": a, "b": b, "c": c})
+        xpdf = DataFrame({"a": a, "b": b, "c": c})
+        s = DataFrame(np.random.random(5).astype("float32"), columns=["md"])
+        rs = df.merge(s, left_on="a", right_index=True)
+        assert rs.dtypes["a"] == "int64"
+        assert rs.dtypes["b"] == "float64"
+        assert rs.dtypes["c"] == "float32"
+        assert rs.dtypes["md"] == "float32"
+
+        xp = xpdf.merge(s, left_on="a", right_index=True)
+        tm.assert_frame_equal(rs, xp)
+
+    def test_join_many_non_unique_index(self):
+        df1 = DataFrame({"a": [1, 1], "b": [1, 1], "c": [10, 20]})
+        df2 = DataFrame({"a": [1, 1], "b": [1, 2], "d": [100, 200]})
+        df3 = DataFrame({"a": [1, 1], "b": [1, 2], "e": [1000, 2000]})
+        idf1 = df1.set_index(["a", "b"])
+        idf2 = df2.set_index(["a", "b"])
+        idf3 = df3.set_index(["a", "b"])
+
+        result = idf1.join([idf2, idf3], how="outer")
+
+        df_partially_merged = merge(df1, df2, on=["a", "b"], how="outer")
+        expected = merge(df_partially_merged, df3, on=["a", "b"], how="outer")
+
+        result = result.reset_index()
+        expected = expected[result.columns]
+        expected["a"] = expected.a.astype("int64")
+        expected["b"] = expected.b.astype("int64")
+        tm.assert_frame_equal(result, expected)
+
+        df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]})
+        df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]})
+        df3 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "e": [1000, 2000, 3000]})
+        idf1 = df1.set_index(["a", "b"])
+        idf2 = df2.set_index(["a", "b"])
+        idf3 = df3.set_index(["a", "b"])
+        result = idf1.join([idf2, idf3], how="inner")
+
+        df_partially_merged = merge(df1, df2, on=["a", "b"], how="inner")
+        expected = merge(df_partially_merged, df3, on=["a", "b"], how="inner")
+
+        result = result.reset_index()
+
+        tm.assert_frame_equal(result, expected.loc[:, result.columns])
+
+        # GH 11519
+        df = DataFrame(
+            {
+                "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+                "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+                "C": np.random.randn(8),
+                "D": np.random.randn(8),
+            }
+        )
+        s = Series(
+            np.repeat(np.arange(8), 2), index=np.repeat(np.arange(8), 2), name="TEST"
+        )
+        inner = df.join(s, how="inner")
+        outer = df.join(s, how="outer")
+        left = df.join(s, how="left")
+        right = df.join(s, how="right")
+        tm.assert_frame_equal(inner, outer)
+        tm.assert_frame_equal(inner, left)
+        tm.assert_frame_equal(inner, right)
+
+    def test_join_sort(self):
+        left = DataFrame({"key": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 4]})
+        right = DataFrame({"value2": ["a", "b", "c"]}, index=["bar", "baz", "foo"])
+
+        joined = left.join(right, on="key", sort=True)
+        expected = DataFrame(
+            {
+                "key": ["bar", "baz", "foo", "foo"],
+                "value": [2, 3, 1, 4],
+                "value2": ["a", "b", "c", "c"],
+            },
+            index=[1, 2, 0, 3],
+        )
+        tm.assert_frame_equal(joined, expected)
+
+        # smoke test
+        joined = left.join(right, on="key", sort=False)
+        tm.assert_index_equal(joined.index, Index(range(4)), exact=True)
+
+    def test_join_mixed_non_unique_index(self):
+        # GH 12814, unorderable types in py3 with a non-unique index
+        df1 = DataFrame({"a": [1, 2, 3, 4]}, index=[1, 2, 3, "a"])
+        df2 = DataFrame({"b": [5, 6, 7, 8]}, index=[1, 3, 3, 4])
+        result = df1.join(df2)
+        expected = DataFrame(
+            {"a": [1, 2, 3, 3, 4], "b": [5, np.nan, 6, 7, np.nan]},
+            index=[1, 2, 3, 3, "a"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+        df3 = DataFrame({"a": [1, 2, 3, 4]}, index=[1, 2, 2, "a"])
+        df4 = DataFrame({"b": [5, 6, 7, 8]}, index=[1, 2, 3, 4])
+        result = df3.join(df4)
+        expected = DataFrame(
+            {"a": [1, 2, 3, 4], "b": [5, 6, 6, np.nan]}, index=[1, 2, 2, "a"]
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_non_unique_period_index(self):
+        # GH #16871
+        index = pd.period_range("2016-01-01", periods=16, freq="M")
+        df = DataFrame(list(range(len(index))), index=index, columns=["pnum"])
+        df2 = concat([df, df])
+        result = df.join(df2, how="inner", rsuffix="_df2")
+        expected = DataFrame(
+            np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2),
+            columns=["pnum", "pnum_df2"],
+            index=df2.sort_index().index,
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_mixed_type_join_with_suffix(self):
+        # GH #916
+        df = DataFrame(np.random.randn(20, 6), columns=["a", "b", "c", "d", "e", "f"])
+        df.insert(0, "id", 0)
+        df.insert(5, "dt", "foo")
+
+        grouped = df.groupby("id")
+        mn = grouped.mean()
+        cn = grouped.count()
+
+        # it works!
+        mn.join(cn, rsuffix="_right")
+
+    def test_join_many(self):
+        df = DataFrame(np.random.randn(10, 6), columns=list("abcdef"))
+        df_list = [df[["a", "b"]], df[["c", "d"]], df[["e", "f"]]]
+
+        joined = df_list[0].join(df_list[1:])
+        tm.assert_frame_equal(joined, df)
+
+        df_list = [df[["a", "b"]][:-2], df[["c", "d"]][2:], df[["e", "f"]][1:9]]
+
+        def _check_diff_index(df_list, result, exp_index):
+            reindexed = [x.reindex(exp_index) for x in df_list]
+            expected = reindexed[0].join(reindexed[1:])
+            tm.assert_frame_equal(result, expected)
+
+        # different join types
+        joined = df_list[0].join(df_list[1:], how="outer")
+        _check_diff_index(df_list, joined, df.index)
+
+        joined = df_list[0].join(df_list[1:])
+        _check_diff_index(df_list, joined, df_list[0].index)
+
+        joined = df_list[0].join(df_list[1:], how="inner")
+        _check_diff_index(df_list, joined, df.index[2:8])
+
+        msg = "Joining multiple DataFrames only supported for joining on index"
+        with pytest.raises(ValueError, match=msg):
+            df_list[0].join(df_list[1:], on="a")
+
+    def test_join_many_mixed(self):
+        df = DataFrame(np.random.randn(8, 4), columns=["A", "B", "C", "D"])
+        df["key"] = ["foo", "bar"] * 4
+        df1 = df.loc[:, ["A", "B"]]
+        df2 = df.loc[:, ["C", "D"]]
+        df3 = df.loc[:, ["key"]]
+
+        result = df1.join([df2, df3])
+        tm.assert_frame_equal(result, df)
+
+    def test_join_dups(self):
+
+        # joining dups
+        df = concat(
+            [
+                DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]),
+                DataFrame(
+                    np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
+                ),
+            ],
+            axis=1,
+        )
+
+        expected = concat([df, df], axis=1)
+        result = df.join(df, rsuffix="_2")
+        result.columns = expected.columns
+        tm.assert_frame_equal(result, expected)
+
+        # GH 4975, invalid join on dups
+        w = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+        x = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+        y = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+        z = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
+
+        dta = x.merge(y, left_index=True, right_index=True).merge(
+            z, left_index=True, right_index=True, how="outer"
+        )
+        with tm.assert_produces_warning(FutureWarning):
+            dta = dta.merge(w, left_index=True, right_index=True)
+        expected = concat([x, y, z, w], axis=1)
+        expected.columns = ["x_x", "y_x", "x_y", "y_y", "x_x", "y_x", "x_y", "y_y"]
+        tm.assert_frame_equal(dta, expected)
+
+    def test_join_multi_to_multi(self, join_type):
+        # GH 20475
+        leftindex = MultiIndex.from_product(
+            [list("abc"), list("xy"), [1, 2]], names=["abc", "xy", "num"]
+        )
+        left = DataFrame({"v1": range(12)}, index=leftindex)
+
+        rightindex = MultiIndex.from_product(
+            [list("abc"), list("xy")], names=["abc", "xy"]
+        )
+        right = DataFrame({"v2": [100 * i for i in range(1, 7)]}, index=rightindex)
+
+        result = left.join(right, on=["abc", "xy"], how=join_type)
+        expected = (
+            left.reset_index()
+            .merge(right.reset_index(), on=["abc", "xy"], how=join_type)
+            .set_index(["abc", "xy", "num"])
+        )
+        tm.assert_frame_equal(expected, result)
+
+        msg = r'len\(left_on\) must equal the number of levels in the index of "right"'
+        with pytest.raises(ValueError, match=msg):
+            left.join(right, on="xy", how=join_type)
+
+        with pytest.raises(ValueError, match=msg):
+            right.join(left, on=["abc", "xy"], how=join_type)
+
+    def test_join_on_tz_aware_datetimeindex(self):
+        # GH 23931, 26335
+        df1 = DataFrame(
+            {
+                "date": pd.date_range(
+                    start="2018-01-01", periods=5, tz="America/Chicago"
+                ),
+                "vals": list("abcde"),
+            }
+        )
+
+        df2 = DataFrame(
+            {
+                "date": pd.date_range(
+                    start="2018-01-03", periods=5, tz="America/Chicago"
+                ),
+                "vals_2": list("tuvwx"),
+            }
+        )
+        result = df1.join(df2.set_index("date"), on="date")
+        expected = df1.copy()
+        expected["vals_2"] = Series([np.nan] * 2 + list("tuv"), dtype=object)
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_datetime_string(self):
+        # GH 5647
+        dfa = DataFrame(
+            [
+                ["2012-08-02", "L", 10],
+                ["2012-08-02", "J", 15],
+                ["2013-04-06", "L", 20],
+                ["2013-04-06", "J", 25],
+            ],
+            columns=["x", "y", "a"],
+        )
+        dfa["x"] = pd.to_datetime(dfa["x"])
+        dfb = DataFrame(
+            [["2012-08-02", "J", 1], ["2013-04-06", "L", 2]],
+            columns=["x", "y", "z"],
+            index=[2, 4],
+        )
+        dfb["x"] = pd.to_datetime(dfb["x"])
+        result = dfb.join(dfa.set_index(["x", "y"]), on=["x", "y"])
+        expected = DataFrame(
+            [
+                [Timestamp("2012-08-02 00:00:00"), "J", 1, 15],
+                [Timestamp("2013-04-06 00:00:00"), "L", 2, 20],
+            ],
+            index=[2, 4],
+            columns=["x", "y", "z", "a"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+
+def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix="_y"):
+
+    # some smoke tests
+    for c in join_col:
+        assert result[c].notna().all()
+
+    left_grouped = left.groupby(join_col)
+    right_grouped = right.groupby(join_col)
+
+    for group_key, group in result.groupby(join_col):
+        l_joined = _restrict_to_columns(group, left.columns, lsuffix)
+        r_joined = _restrict_to_columns(group, right.columns, rsuffix)
+
+        try:
+            lgroup = left_grouped.get_group(group_key)
+        except KeyError as err:
+            if how in ("left", "inner"):
+                raise AssertionError(
+                    f"key {group_key} should not have been in the join"
+                ) from err
+
+            _assert_all_na(l_joined, left.columns, join_col)
+        else:
+            _assert_same_contents(l_joined, lgroup)
+
+        try:
+            rgroup = right_grouped.get_group(group_key)
+        except KeyError as err:
+            if how in ("right", "inner"):
+                raise AssertionError(
+                    f"key {group_key} should not have been in the join"
+                ) from err
+
+            _assert_all_na(r_joined, right.columns, join_col)
+        else:
+            _assert_same_contents(r_joined, rgroup)
+
+
+def _restrict_to_columns(group, columns, suffix):
+    found = [
+        c for c in group.columns if c in columns or c.replace(suffix, "") in columns
+    ]
+
+    # filter
+    group = group.loc[:, found]
+
+    # get rid of suffixes, if any
+    group = group.rename(columns=lambda x: x.replace(suffix, ""))
+
+    # put in the right order...
+    group = group.loc[:, columns]
+
+    return group
+
+
+def _assert_same_contents(join_chunk, source):
+    NA_SENTINEL = -1234567  # drop_duplicates not so NA-friendly...
+
+    jvalues = join_chunk.fillna(NA_SENTINEL).drop_duplicates().values
+    svalues = source.fillna(NA_SENTINEL).drop_duplicates().values
+
+    rows = {tuple(row) for row in jvalues}
+    assert len(rows) == len(source)
+    assert all(tuple(row) in rows for row in svalues)
+
+
+def _assert_all_na(join_chunk, source_columns, join_col):
+    for c in source_columns:
+        if c in join_col:
+            continue
+        assert join_chunk[c].isna().all()
+
+
+def _join_by_hand(a, b, how="left"):
+    join_index = a.index.join(b.index, how=how)
+
+    a_re = a.reindex(join_index)
+    b_re = b.reindex(join_index)
+
+    result_columns = a.columns.append(b.columns)
+
+    for col, s in b_re.items():
+        a_re[col] = s
+    return a_re.reindex(columns=result_columns)
+
+
+def test_join_inner_multiindex_deterministic_order():
+    # GH: 36910
+    left = DataFrame(
+        data={"e": 5},
+        index=MultiIndex.from_tuples([(1, 2, 4)], names=("a", "b", "d")),
+    )
+    right = DataFrame(
+        data={"f": 6}, index=MultiIndex.from_tuples([(2, 3)], names=("b", "c"))
+    )
+    result = left.join(right, how="inner")
+    expected = DataFrame(
+        {"e": [5], "f": [6]},
+        index=MultiIndex.from_tuples([(2, 1, 4, 3)], names=("b", "a", "d", "c")),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    ("input_col", "output_cols"), [("b", ["a", "b"]), ("a", ["a_x", "a_y"])]
+)
+def test_join_cross(input_col, output_cols):
+    # GH#5401
+    left = DataFrame({"a": [1, 3]})
+    right = DataFrame({input_col: [3, 4]})
+    result = left.join(right, how="cross", lsuffix="_x", rsuffix="_y")
+    expected = DataFrame({output_cols[0]: [1, 1, 3, 3], output_cols[1]: [3, 4, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_join_multiindex_one_level(join_type):
+    # GH#36909
+    left = DataFrame(
+        data={"c": 3}, index=MultiIndex.from_tuples([(1, 2)], names=("a", "b"))
+    )
+    right = DataFrame(data={"d": 4}, index=MultiIndex.from_tuples([(2,)], names=("b",)))
+    result = left.join(right, how=join_type)
+    expected = DataFrame(
+        {"c": [3], "d": [4]},
+        index=MultiIndex.from_tuples([(2, 1)], names=["b", "a"]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "categories, values",
+    [
+        (["Y", "X"], ["Y", "X", "X"]),
+        ([2, 1], [2, 1, 1]),
+        ([2.5, 1.5], [2.5, 1.5, 1.5]),
+        (
+            [Timestamp("2020-12-31"), Timestamp("2019-12-31")],
+            [Timestamp("2020-12-31"), Timestamp("2019-12-31"), Timestamp("2019-12-31")],
+        ),
+    ],
+)
+def test_join_multiindex_not_alphabetical_categorical(categories, values):
+    # GH#38502
+    left = DataFrame(
+        {
+            "first": ["A", "A"],
+            "second": Categorical(categories, categories=categories),
+            "value": [1, 2],
+        }
+    ).set_index(["first", "second"])
+    right = DataFrame(
+        {
+            "first": ["A", "A", "B"],
+            "second": Categorical(values, categories=categories),
+            "value": [3, 4, 5],
+        }
+    ).set_index(["first", "second"])
+    result = left.join(right, lsuffix="_left", rsuffix="_right")
+
+    expected = DataFrame(
+        {
+            "first": ["A", "A"],
+            "second": Categorical(categories, categories=categories),
+            "value_left": [1, 2],
+            "value_right": [3, 4],
+        }
+    ).set_index(["first", "second"])
+    tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/merge/test_merge.py
+++ b/dist/client/pandas/tests/reshape/merge/test_merge.py
--- a/dist/client/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/dist/client/pandas/tests/reshape/merge/test_merge_asof.py
--- a/dist/client/pandas/tests/reshape/merge/test_merge_cross.py
+++ b/dist/client/pandas/tests/reshape/merge/test_merge_cross.py
@@ -0,0 +1,98 @@
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+from pandas.core.reshape.merge import (
+    MergeError,
+    merge,
+)
+
+
+@pytest.mark.parametrize(
+    ("input_col", "output_cols"), [("b", ["a", "b"]), ("a", ["a_x", "a_y"])]
+)
+def test_merge_cross(input_col, output_cols):
+    # GH#5401
+    left = DataFrame({"a": [1, 3]})
+    right = DataFrame({input_col: [3, 4]})
+    left_copy = left.copy()
+    right_copy = right.copy()
+    result = merge(left, right, how="cross")
+    expected = DataFrame({output_cols[0]: [1, 1, 3, 3], output_cols[1]: [3, 4, 3, 4]})
+    tm.assert_frame_equal(result, expected)
+    tm.assert_frame_equal(left, left_copy)
+    tm.assert_frame_equal(right, right_copy)
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"left_index": True},
+        {"right_index": True},
+        {"on": "a"},
+        {"left_on": "a"},
+        {"right_on": "b"},
+    ],
+)
+def test_merge_cross_error_reporting(kwargs):
+    # GH#5401
+    left = DataFrame({"a": [1, 3]})
+    right = DataFrame({"b": [3, 4]})
+    msg = (
+        "Can not pass on, right_on, left_on or set right_index=True or "
+        "left_index=True"
+    )
+    with pytest.raises(MergeError, match=msg):
+        merge(left, right, how="cross", **kwargs)
+
+
+def test_merge_cross_mixed_dtypes():
+    # GH#5401
+    left = DataFrame(["a", "b", "c"], columns=["A"])
+    right = DataFrame(range(2), columns=["B"])
+    result = merge(left, right, how="cross")
+    expected = DataFrame({"A": ["a", "a", "b", "b", "c", "c"], "B": [0, 1, 0, 1, 0, 1]})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_merge_cross_more_than_one_column():
+    # GH#5401
+    left = DataFrame({"A": list("ab"), "B": [2, 1]})
+    right = DataFrame({"C": range(2), "D": range(4, 6)})
+    result = merge(left, right, how="cross")
+    expected = DataFrame(
+        {
+            "A": ["a", "a", "b", "b"],
+            "B": [2, 2, 1, 1],
+            "C": [0, 1, 0, 1],
+            "D": [4, 5, 4, 5],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_merge_cross_null_values(nulls_fixture):
+    # GH#5401
+    left = DataFrame({"a": [1, nulls_fixture]})
+    right = DataFrame({"b": ["a", "b"], "c": [1.0, 2.0]})
+    result = merge(left, right, how="cross")
+    expected = DataFrame(
+        {
+            "a": [1, 1, nulls_fixture, nulls_fixture],
+            "b": ["a", "b", "a", "b"],
+            "c": [1.0, 2.0, 1.0, 2.0],
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_join_cross_error_reporting():
+    # GH#5401
+    left = DataFrame({"a": [1, 3]})
+    right = DataFrame({"a": [3, 4]})
+    msg = (
+        "Can not pass on, right_on, left_on or set right_index=True or "
+        "left_index=True"
+    )
+    with pytest.raises(MergeError, match=msg):
+        left.join(right, how="cross", on="a")
--- a/dist/client/pandas/tests/reshape/merge/test_merge_index_as_string.py
+++ b/dist/client/pandas/tests/reshape/merge/test_merge_index_as_string.py
@@ -0,0 +1,189 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+
+@pytest.fixture
+def df1():
+    return DataFrame(
+        {
+            "outer": [1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4],
+            "inner": [1, 2, 3, 1, 2, 3, 4, 1, 2, 1, 2],
+            "v1": np.linspace(0, 1, 11),
+        }
+    )
+
+
+@pytest.fixture
+def df2():
+    return DataFrame(
+        {
+            "outer": [1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3],
+            "inner": [1, 2, 2, 3, 3, 4, 2, 3, 1, 1, 2, 3],
+            "v2": np.linspace(10, 11, 12),
+        }
+    )
+
+
+@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
+def left_df(request, df1):
+    """Construct left test DataFrame with specified levels
+    (any of 'outer', 'inner', and 'v1')
+    """
+    levels = request.param
+    if levels:
+        df1 = df1.set_index(levels)
+
+    return df1
+
+
+@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
+def right_df(request, df2):
+    """Construct right test DataFrame with specified levels
+    (any of 'outer', 'inner', and 'v2')
+    """
+    levels = request.param
+
+    if levels:
+        df2 = df2.set_index(levels)
+
+    return df2
+
+
+def compute_expected(df_left, df_right, on=None, left_on=None, right_on=None, how=None):
+    """
+    Compute the expected merge result for the test case.
+
+    This method computes the expected result of merging two DataFrames on
+    a combination of their columns and index levels. It does so by
+    explicitly dropping/resetting their named index levels, performing a
+    merge on their columns, and then finally restoring the appropriate
+    index in the result.
+
+    Parameters
+    ----------
+    df_left : DataFrame
+        The left DataFrame (may have zero or more named index levels)
+    df_right : DataFrame
+        The right DataFrame (may have zero or more named index levels)
+    on : list of str
+        The on parameter to the merge operation
+    left_on : list of str
+        The left_on parameter to the merge operation
+    right_on : list of str
+        The right_on parameter to the merge operation
+    how : str
+        The how parameter to the merge operation
+
+    Returns
+    -------
+    DataFrame
+        The expected merge result
+    """
+    # Handle on param if specified
+    if on is not None:
+        left_on, right_on = on, on
+
+    # Compute input named index levels
+    left_levels = [n for n in df_left.index.names if n is not None]
+    right_levels = [n for n in df_right.index.names if n is not None]
+
+    # Compute output named index levels
+    output_levels = [i for i in left_on if i in right_levels and i in left_levels]
+
+    # Drop index levels that aren't involved in the merge
+    drop_left = [n for n in left_levels if n not in left_on]
+    if drop_left:
+        df_left = df_left.reset_index(drop_left, drop=True)
+
+    drop_right = [n for n in right_levels if n not in right_on]
+    if drop_right:
+        df_right = df_right.reset_index(drop_right, drop=True)
+
+    # Convert remaining index levels to columns
+    reset_left = [n for n in left_levels if n in left_on]
+    if reset_left:
+        df_left = df_left.reset_index(level=reset_left)
+
+    reset_right = [n for n in right_levels if n in right_on]
+    if reset_right:
+        df_right = df_right.reset_index(level=reset_right)
+
+    # Perform merge
+    expected = df_left.merge(df_right, left_on=left_on, right_on=right_on, how=how)
+
+    # Restore index levels
+    if output_levels:
+        expected = expected.set_index(output_levels)
+
+    return expected
+
+
+@pytest.mark.parametrize(
+    "on,how",
+    [
+        (["outer"], "inner"),
+        (["inner"], "left"),
+        (["outer", "inner"], "right"),
+        (["inner", "outer"], "outer"),
+    ],
+)
+def test_merge_indexes_and_columns_on(left_df, right_df, on, how):
+
+    # Construct expected result
+    expected = compute_expected(left_df, right_df, on=on, how=how)
+
+    # Perform merge
+    result = left_df.merge(right_df, on=on, how=how)
+    tm.assert_frame_equal(result, expected, check_like=True)
+
+
+@pytest.mark.parametrize(
+    "left_on,right_on,how",
+    [
+        (["outer"], ["outer"], "inner"),
+        (["inner"], ["inner"], "right"),
+        (["outer", "inner"], ["outer", "inner"], "left"),
+        (["inner", "outer"], ["inner", "outer"], "outer"),
+    ],
+)
+def test_merge_indexes_and_columns_lefton_righton(
+    left_df, right_df, left_on, right_on, how
+):
+
+    # Construct expected result
+    expected = compute_expected(
+        left_df, right_df, left_on=left_on, right_on=right_on, how=how
+    )
+
+    # Perform merge
+    result = left_df.merge(right_df, left_on=left_on, right_on=right_on, how=how)
+    tm.assert_frame_equal(result, expected, check_like=True)
+
+
+@pytest.mark.parametrize("left_index", ["inner", ["inner", "outer"]])
+def test_join_indexes_and_columns_on(df1, df2, left_index, join_type):
+
+    # Construct left_df
+    left_df = df1.set_index(left_index)
+
+    # Construct right_df
+    right_df = df2.set_index(["outer", "inner"])
+
+    # Result
+    expected = (
+        left_df.reset_index()
+        .join(
+            right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y"
+        )
+        .set_index(left_index)
+    )
+
+    # Perform join
+    result = left_df.join(
+        right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y"
+    )
+
+    tm.assert_frame_equal(result, expected, check_like=True)
--- a/dist/client/pandas/tests/reshape/merge/test_merge_ordered.py
+++ b/dist/client/pandas/tests/reshape/merge/test_merge_ordered.py
@@ -0,0 +1,201 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    merge_ordered,
+)
+import pandas._testing as tm
+
+
+class TestMergeOrdered:
+    def setup_method(self, method):
+        self.left = DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2.0, 3]})
+
+        self.right = DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [1, 2, 3.0, 4]})
+
+    def test_basic(self):
+        result = merge_ordered(self.left, self.right, on="key")
+        expected = DataFrame(
+            {
+                "key": ["a", "b", "c", "d", "e", "f"],
+                "lvalue": [1, np.nan, 2, np.nan, 3, np.nan],
+                "rvalue": [np.nan, 1, 2, 3, np.nan, 4],
+            }
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_ffill(self):
+        result = merge_ordered(self.left, self.right, on="key", fill_method="ffill")
+        expected = DataFrame(
+            {
+                "key": ["a", "b", "c", "d", "e", "f"],
+                "lvalue": [1.0, 1, 2, 2, 3, 3.0],
+                "rvalue": [np.nan, 1, 2, 3, 3, 4],
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_multigroup(self):
+        left = pd.concat([self.left, self.left], ignore_index=True)
+
+        left["group"] = ["a"] * 3 + ["b"] * 3
+
+        result = merge_ordered(
+            left, self.right, on="key", left_by="group", fill_method="ffill"
+        )
+        expected = DataFrame(
+            {
+                "key": ["a", "b", "c", "d", "e", "f"] * 2,
+                "lvalue": [1.0, 1, 2, 2, 3, 3.0] * 2,
+                "rvalue": [np.nan, 1, 2, 3, 3, 4] * 2,
+            }
+        )
+        expected["group"] = ["a"] * 6 + ["b"] * 6
+
+        tm.assert_frame_equal(result, expected.loc[:, result.columns])
+
+        result2 = merge_ordered(
+            self.right, left, on="key", right_by="group", fill_method="ffill"
+        )
+        tm.assert_frame_equal(result, result2.loc[:, result.columns])
+
+        result = merge_ordered(left, self.right, on="key", left_by="group")
+        assert result["group"].notna().all()
+
+    def test_merge_type(self):
+        class NotADataFrame(DataFrame):
+            @property
+            def _constructor(self):
+                return NotADataFrame
+
+        nad = NotADataFrame(self.left)
+        result = nad.merge(self.right, on="key")
+
+        assert isinstance(result, NotADataFrame)
+
+    def test_empty_sequence_concat(self):
+        # GH 9157
+        empty_pat = "[Nn]o objects"
+        none_pat = "objects.*None"
+        test_cases = [
+            ((), empty_pat),
+            ([], empty_pat),
+            ({}, empty_pat),
+            ([None], none_pat),
+            ([None, None], none_pat),
+        ]
+        for df_seq, pattern in test_cases:
+            with pytest.raises(ValueError, match=pattern):
+                pd.concat(df_seq)
+
+        pd.concat([DataFrame()])
+        pd.concat([None, DataFrame()])
+        pd.concat([DataFrame(), None])
+
+    def test_doc_example(self):
+        left = DataFrame(
+            {
+                "group": list("aaabbb"),
+                "key": ["a", "c", "e", "a", "c", "e"],
+                "lvalue": [1, 2, 3] * 2,
+            }
+        )
+
+        right = DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]})
+
+        result = merge_ordered(left, right, fill_method="ffill", left_by="group")
+
+        expected = DataFrame(
+            {
+                "group": list("aaaaabbbbb"),
+                "key": ["a", "b", "c", "d", "e"] * 2,
+                "lvalue": [1, 1, 2, 2, 3] * 2,
+                "rvalue": [np.nan, 1, 2, 3, 3] * 2,
+            }
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "left, right, on, left_by, right_by, expected",
+        [
+            (
+                DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
+                DataFrame({"T": [2], "E": [1]}),
+                ["T"],
+                ["G", "H"],
+                None,
+                DataFrame(
+                    {
+                        "G": ["g"] * 3,
+                        "H": ["h"] * 3,
+                        "T": [1, 2, 3],
+                        "E": [np.nan, 1.0, np.nan],
+                    }
+                ),
+            ),
+            (
+                DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
+                DataFrame({"T": [2], "E": [1]}),
+                "T",
+                ["G", "H"],
+                None,
+                DataFrame(
+                    {
+                        "G": ["g"] * 3,
+                        "H": ["h"] * 3,
+                        "T": [1, 2, 3],
+                        "E": [np.nan, 1.0, np.nan],
+                    }
+                ),
+            ),
+            (
+                DataFrame({"T": [2], "E": [1]}),
+                DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
+                ["T"],
+                None,
+                ["G", "H"],
+                DataFrame(
+                    {
+                        "T": [1, 2, 3],
+                        "E": [np.nan, 1.0, np.nan],
+                        "G": ["g"] * 3,
+                        "H": ["h"] * 3,
+                    }
+                ),
+            ),
+        ],
+    )
+    def test_list_type_by(self, left, right, on, left_by, right_by, expected):
+        # GH 35269
+        result = merge_ordered(
+            left=left,
+            right=right,
+            on=on,
+            left_by=left_by,
+            right_by=right_by,
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_by_length_equals_to_right_shape0(self):
+        # GH 38166
+        left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
+        right = DataFrame([[2, 1]], columns=list("ET"))
+        result = merge_ordered(left, right, on="E", left_by=["G", "H"])
+        expected = DataFrame(
+            {"G": ["g"] * 3, "H": ["h"] * 3, "E": [1, 2, 3], "T": [np.nan, 1.0, np.nan]}
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_elements_not_in_by_but_in_df(self):
+        # GH 38167
+        left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
+        right = DataFrame([[2, 1]], columns=list("ET"))
+        msg = r"\{'h'\} not found in left columns"
+        with pytest.raises(KeyError, match=msg):
+            merge_ordered(left, right, on="E", left_by=["G", "h"])
--- a/dist/client/pandas/tests/reshape/merge/test_multi.py
+++ b/dist/client/pandas/tests/reshape/merge/test_multi.py
@@ -0,0 +1,909 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    Timestamp,
+)
+import pandas._testing as tm
+from pandas.core.reshape.concat import concat
+from pandas.core.reshape.merge import merge
+
+
+@pytest.fixture
+def left():
+    """left dataframe (not multi-indexed) for multi-index join tests"""
+    # a little relevant example with NAs
+    key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"]
+    key2 = ["two", "one", "three", "one", "two", "one", "two", "two", "three", "one"]
+
+    data = np.random.randn(len(key1))
+    return DataFrame({"key1": key1, "key2": key2, "data": data})
+
+
+@pytest.fixture
+def right(multiindex_dataframe_random_data):
+    """right dataframe (multi-indexed) for multi-index join tests"""
+    df = multiindex_dataframe_random_data
+    df.index.names = ["key1", "key2"]
+
+    df.columns = ["j_one", "j_two", "j_three"]
+    return df
+
+
+@pytest.fixture
+def left_multi():
+    return DataFrame(
+        {
+            "Origin": ["A", "A", "B", "B", "C"],
+            "Destination": ["A", "B", "A", "C", "A"],
+            "Period": ["AM", "AM", "IP", "AM", "OP"],
+            "TripPurp": ["hbw", "nhb", "hbo", "nhb", "hbw"],
+            "Trips": [1987, 3647, 2470, 4296, 4444],
+        },
+        columns=["Origin", "Destination", "Period", "TripPurp", "Trips"],
+    ).set_index(["Origin", "Destination", "Period", "TripPurp"])
+
+
+@pytest.fixture
+def right_multi():
+    return DataFrame(
+        {
+            "Origin": ["A", "A", "B", "B", "C", "C", "E"],
+            "Destination": ["A", "B", "A", "B", "A", "B", "F"],
+            "Period": ["AM", "AM", "IP", "AM", "OP", "IP", "AM"],
+            "LinkType": ["a", "b", "c", "b", "a", "b", "a"],
+            "Distance": [100, 80, 90, 80, 75, 35, 55],
+        },
+        columns=["Origin", "Destination", "Period", "LinkType", "Distance"],
+    ).set_index(["Origin", "Destination", "Period", "LinkType"])
+
+
+@pytest.fixture
+def on_cols_multi():
+    return ["Origin", "Destination", "Period"]
+
+
+@pytest.fixture
+def idx_cols_multi():
+    return ["Origin", "Destination", "Period", "TripPurp", "LinkType"]
+
+
+class TestMergeMulti:
+    def test_merge_on_multikey(self, left, right, join_type):
+        on_cols = ["key1", "key2"]
+        result = left.join(right, on=on_cols, how=join_type).reset_index(drop=True)
+
+        expected = merge(left, right.reset_index(), on=on_cols, how=join_type)
+
+        tm.assert_frame_equal(result, expected)
+
+        result = left.join(right, on=on_cols, how=join_type, sort=True).reset_index(
+            drop=True
+        )
+
+        expected = merge(
+            left, right.reset_index(), on=on_cols, how=join_type, sort=True
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("sort", [False, True])
+    def test_left_join_multi_index(self, left, right, sort):
+        icols = ["1st", "2nd", "3rd"]
+
+        def bind_cols(df):
+            iord = lambda a: 0 if a != a else ord(a)
+            f = lambda ts: ts.map(iord) - ord("a")
+            return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 1e4
+
+        def run_asserts(left, right, sort):
+            res = left.join(right, on=icols, how="left", sort=sort)
+
+            assert len(left) < len(res) + 1
+            assert not res["4th"].isna().any()
+            assert not res["5th"].isna().any()
+
+            tm.assert_series_equal(res["4th"], -res["5th"], check_names=False)
+            result = bind_cols(res.iloc[:, :-2])
+            tm.assert_series_equal(res["4th"], result, check_names=False)
+            assert result.name is None
+
+            if sort:
+                tm.assert_frame_equal(res, res.sort_values(icols, kind="mergesort"))
+
+            out = merge(left, right.reset_index(), on=icols, sort=sort, how="left")
+
+            res.index = np.arange(len(res))
+            tm.assert_frame_equal(out, res)
+
+        lc = list(map(chr, np.arange(ord("a"), ord("z") + 1)))
+        left = DataFrame(np.random.choice(lc, (5000, 2)), columns=["1st", "3rd"])
+        left.insert(1, "2nd", np.random.randint(0, 1000, len(left)))
+
+        i = np.random.permutation(len(left))
+        right = left.iloc[i].copy()
+
+        left["4th"] = bind_cols(left)
+        right["5th"] = -bind_cols(right)
+        right.set_index(icols, inplace=True)
+
+        run_asserts(left, right, sort)
+
+        # inject some nulls
+        left.loc[1::23, "1st"] = np.nan
+        left.loc[2::37, "2nd"] = np.nan
+        left.loc[3::43, "3rd"] = np.nan
+        left["4th"] = bind_cols(left)
+
+        i = np.random.permutation(len(left))
+        right = left.iloc[i, :-1]
+        right["5th"] = -bind_cols(right)
+        right.set_index(icols, inplace=True)
+
+        run_asserts(left, right, sort)
+
+    @pytest.mark.parametrize("sort", [False, True])
+    def test_merge_right_vs_left(self, left, right, sort):
+        # compare left vs right merge with multikey
+        on_cols = ["key1", "key2"]
+        merged_left_right = left.merge(
+            right, left_on=on_cols, right_index=True, how="left", sort=sort
+        )
+
+        merge_right_left = right.merge(
+            left, right_on=on_cols, left_index=True, how="right", sort=sort
+        )
+
+        # Reorder columns
+        merge_right_left = merge_right_left[merged_left_right.columns]
+
+        tm.assert_frame_equal(merged_left_right, merge_right_left)
+
+    def test_merge_multiple_cols_with_mixed_cols_index(self):
+        # GH29522
+        s = Series(
+            range(6),
+            MultiIndex.from_product([["A", "B"], [1, 2, 3]], names=["lev1", "lev2"]),
+            name="Amount",
+        )
+        df = DataFrame({"lev1": list("AAABBB"), "lev2": [1, 2, 3, 1, 2, 3], "col": 0})
+        result = merge(df, s.reset_index(), on=["lev1", "lev2"])
+        expected = DataFrame(
+            {
+                "lev1": list("AAABBB"),
+                "lev2": [1, 2, 3, 1, 2, 3],
+                "col": [0] * 6,
+                "Amount": range(6),
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_compress_group_combinations(self):
+
+        # ~ 40000000 possible unique groups
+        key1 = tm.rands_array(10, 10000)
+        key1 = np.tile(key1, 2)
+        key2 = key1[::-1]
+
+        df = DataFrame({"key1": key1, "key2": key2, "value1": np.random.randn(20000)})
+
+        df2 = DataFrame(
+            {"key1": key1[::2], "key2": key2[::2], "value2": np.random.randn(10000)}
+        )
+
+        # just to hit the label compression code path
+        merge(df, df2, how="outer")
+
+    def test_left_join_index_preserve_order(self):
+
+        on_cols = ["k1", "k2"]
+        left = DataFrame(
+            {
+                "k1": [0, 1, 2] * 8,
+                "k2": ["foo", "bar"] * 12,
+                "v": np.array(np.arange(24), dtype=np.int64),
+            }
+        )
+
+        index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")])
+        right = DataFrame({"v2": [5, 7]}, index=index)
+
+        result = left.join(right, on=on_cols)
+
+        expected = left.copy()
+        expected["v2"] = np.nan
+        expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5
+        expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7
+
+        tm.assert_frame_equal(result, expected)
+
+        result.sort_values(on_cols, kind="mergesort", inplace=True)
+        expected = left.join(right, on=on_cols, sort=True)
+
+        tm.assert_frame_equal(result, expected)
+
+        # test join with multi dtypes blocks
+        left = DataFrame(
+            {
+                "k1": [0, 1, 2] * 8,
+                "k2": ["foo", "bar"] * 12,
+                "k3": np.array([0, 1, 2] * 8, dtype=np.float32),
+                "v": np.array(np.arange(24), dtype=np.int32),
+            }
+        )
+
+        index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")])
+        right = DataFrame({"v2": [5, 7]}, index=index)
+
+        result = left.join(right, on=on_cols)
+
+        expected = left.copy()
+        expected["v2"] = np.nan
+        expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5
+        expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7
+
+        tm.assert_frame_equal(result, expected)
+
+        result = result.sort_values(on_cols, kind="mergesort")
+        expected = left.join(right, on=on_cols, sort=True)
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_join_index_multi_match_multiindex(self):
+        left = DataFrame(
+            [
+                ["X", "Y", "C", "a"],
+                ["W", "Y", "C", "e"],
+                ["V", "Q", "A", "h"],
+                ["V", "R", "D", "i"],
+                ["X", "Y", "D", "b"],
+                ["X", "Y", "A", "c"],
+                ["W", "Q", "B", "f"],
+                ["W", "R", "C", "g"],
+                ["V", "Y", "C", "j"],
+                ["X", "Y", "B", "d"],
+            ],
+            columns=["cola", "colb", "colc", "tag"],
+            index=[3, 2, 0, 1, 7, 6, 4, 5, 9, 8],
+        )
+
+        right = DataFrame(
+            [
+                ["W", "R", "C", 0],
+                ["W", "Q", "B", 3],
+                ["W", "Q", "B", 8],
+                ["X", "Y", "A", 1],
+                ["X", "Y", "A", 4],
+                ["X", "Y", "B", 5],
+                ["X", "Y", "C", 6],
+                ["X", "Y", "C", 9],
+                ["X", "Q", "C", -6],
+                ["X", "R", "C", -9],
+                ["V", "Y", "C", 7],
+                ["V", "R", "D", 2],
+                ["V", "R", "D", -1],
+                ["V", "Q", "A", -3],
+            ],
+            columns=["col1", "col2", "col3", "val"],
+        ).set_index(["col1", "col2", "col3"])
+
+        result = left.join(right, on=["cola", "colb", "colc"], how="left")
+
+        expected = DataFrame(
+            [
+                ["X", "Y", "C", "a", 6],
+                ["X", "Y", "C", "a", 9],
+                ["W", "Y", "C", "e", np.nan],
+                ["V", "Q", "A", "h", -3],
+                ["V", "R", "D", "i", 2],
+                ["V", "R", "D", "i", -1],
+                ["X", "Y", "D", "b", np.nan],
+                ["X", "Y", "A", "c", 1],
+                ["X", "Y", "A", "c", 4],
+                ["W", "Q", "B", "f", 3],
+                ["W", "Q", "B", "f", 8],
+                ["W", "R", "C", "g", 0],
+                ["V", "Y", "C", "j", 7],
+                ["X", "Y", "B", "d", 5],
+            ],
+            columns=["cola", "colb", "colc", "tag", "val"],
+            index=[3, 3, 2, 0, 1, 1, 7, 6, 6, 4, 4, 5, 9, 8],
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+        result = left.join(right, on=["cola", "colb", "colc"], how="left", sort=True)
+
+        expected = expected.sort_values(["cola", "colb", "colc"], kind="mergesort")
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_join_index_multi_match(self):
+        left = DataFrame(
+            [["c", 0], ["b", 1], ["a", 2], ["b", 3]],
+            columns=["tag", "val"],
+            index=[2, 0, 1, 3],
+        )
+
+        right = DataFrame(
+            [
+                ["a", "v"],
+                ["c", "w"],
+                ["c", "x"],
+                ["d", "y"],
+                ["a", "z"],
+                ["c", "r"],
+                ["e", "q"],
+                ["c", "s"],
+            ],
+            columns=["tag", "char"],
+        ).set_index("tag")
+
+        result = left.join(right, on="tag", how="left")
+
+        expected = DataFrame(
+            [
+                ["c", 0, "w"],
+                ["c", 0, "x"],
+                ["c", 0, "r"],
+                ["c", 0, "s"],
+                ["b", 1, np.nan],
+                ["a", 2, "v"],
+                ["a", 2, "z"],
+                ["b", 3, np.nan],
+            ],
+            columns=["tag", "val", "char"],
+            index=[2, 2, 2, 2, 0, 1, 1, 3],
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+        result = left.join(right, on="tag", how="left", sort=True)
+        expected2 = expected.sort_values("tag", kind="mergesort")
+
+        tm.assert_frame_equal(result, expected2)
+
+        # GH7331 - maintain left frame order in left merge
+        result = merge(left, right.reset_index(), how="left", on="tag")
+        expected.index = np.arange(len(expected))
+        tm.assert_frame_equal(result, expected)
+
+    def test_left_merge_na_buglet(self):
+        left = DataFrame(
+            {
+                "id": list("abcde"),
+                "v1": np.random.randn(5),
+                "v2": np.random.randn(5),
+                "dummy": list("abcde"),
+                "v3": np.random.randn(5),
+            },
+            columns=["id", "v1", "v2", "dummy", "v3"],
+        )
+        right = DataFrame(
+            {
+                "id": ["a", "b", np.nan, np.nan, np.nan],
+                "sv3": [1.234, 5.678, np.nan, np.nan, np.nan],
+            }
+        )
+
+        result = merge(left, right, on="id", how="left")
+
+        rdf = right.drop(["id"], axis=1)
+        expected = left.join(rdf)
+        tm.assert_frame_equal(result, expected)
+
+    def test_merge_na_keys(self):
+        data = [
+            [1950, "A", 1.5],
+            [1950, "B", 1.5],
+            [1955, "B", 1.5],
+            [1960, "B", np.nan],
+            [1970, "B", 4.0],
+            [1950, "C", 4.0],
+            [1960, "C", np.nan],
+            [1965, "C", 3.0],
+            [1970, "C", 4.0],
+        ]
+
+        frame = DataFrame(data, columns=["year", "panel", "data"])
+
+        other_data = [
+            [1960, "A", np.nan],
+            [1970, "A", np.nan],
+            [1955, "A", np.nan],
+            [1965, "A", np.nan],
+            [1965, "B", np.nan],
+            [1955, "C", np.nan],
+        ]
+        other = DataFrame(other_data, columns=["year", "panel", "data"])
+
+        result = frame.merge(other, how="outer")
+
+        expected = frame.fillna(-999).merge(other.fillna(-999), how="outer")
+        expected = expected.replace(-999, np.nan)
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index])
+    def test_merge_datetime_index(self, klass):
+        # see gh-19038
+        df = DataFrame(
+            [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"]
+        )
+        df.index = pd.to_datetime(df.index)
+        on_vector = df.index.year
+
+        if klass is not None:
+            on_vector = klass(on_vector)
+
+        expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]})
+
+        result = df.merge(df, on=["a", on_vector], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]}
+        )
+
+        result = df.merge(df, on=[df.index.year], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("merge_type", ["left", "right"])
+    def test_merge_datetime_multi_index_empty_df(self, merge_type):
+        # see gh-36895
+
+        left = DataFrame(
+            data={
+                "data": [1.5, 1.5],
+            },
+            index=MultiIndex.from_tuples(
+                [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]],
+                names=["date", "panel"],
+            ),
+        )
+
+        right = DataFrame(
+            index=MultiIndex.from_tuples([], names=["date", "panel"]), columns=["state"]
+        )
+
+        expected_index = MultiIndex.from_tuples(
+            [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]],
+            names=["date", "panel"],
+        )
+
+        if merge_type == "left":
+            expected = DataFrame(
+                data={
+                    "data": [1.5, 1.5],
+                    "state": [None, None],
+                },
+                index=expected_index,
+            )
+            results_merge = left.merge(right, how="left", on=["date", "panel"])
+            results_join = left.join(right, how="left")
+        else:
+            expected = DataFrame(
+                data={
+                    "state": [None, None],
+                    "data": [1.5, 1.5],
+                },
+                index=expected_index,
+            )
+            results_merge = right.merge(left, how="right", on=["date", "panel"])
+            results_join = right.join(left, how="right")
+
+        tm.assert_frame_equal(results_merge, expected)
+        tm.assert_frame_equal(results_join, expected)
+
+    @pytest.fixture
+    def household(self):
+        household = DataFrame(
+            {
+                "household_id": [1, 2, 3],
+                "male": [0, 1, 0],
+                "wealth": [196087.3, 316478.7, 294750],
+            },
+            columns=["household_id", "male", "wealth"],
+        ).set_index("household_id")
+        return household
+
+    @pytest.fixture
+    def portfolio(self):
+        portfolio = DataFrame(
+            {
+                "household_id": [1, 2, 2, 3, 3, 3, 4],
+                "asset_id": [
+                    "nl0000301109",
+                    "nl0000289783",
+                    "gb00b03mlx29",
+                    "gb00b03mlx29",
+                    "lu0197800237",
+                    "nl0000289965",
+                    np.nan,
+                ],
+                "name": [
+                    "ABN Amro",
+                    "Robeco",
+                    "Royal Dutch Shell",
+                    "Royal Dutch Shell",
+                    "AAB Eastern Europe Equity Fund",
+                    "Postbank BioTech Fonds",
+                    np.nan,
+                ],
+                "share": [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0],
+            },
+            columns=["household_id", "asset_id", "name", "share"],
+        ).set_index(["household_id", "asset_id"])
+        return portfolio
+
+    @pytest.fixture
+    def expected(self):
+        expected = (
+            DataFrame(
+                {
+                    "male": [0, 1, 1, 0, 0, 0],
+                    "wealth": [
+                        196087.3,
+                        316478.7,
+                        316478.7,
+                        294750.0,
+                        294750.0,
+                        294750.0,
+                    ],
+                    "name": [
+                        "ABN Amro",
+                        "Robeco",
+                        "Royal Dutch Shell",
+                        "Royal Dutch Shell",
+                        "AAB Eastern Europe Equity Fund",
+                        "Postbank BioTech Fonds",
+                    ],
+                    "share": [1.00, 0.40, 0.60, 0.15, 0.60, 0.25],
+                    "household_id": [1, 2, 2, 3, 3, 3],
+                    "asset_id": [
+                        "nl0000301109",
+                        "nl0000289783",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "lu0197800237",
+                        "nl0000289965",
+                    ],
+                }
+            )
+            .set_index(["household_id", "asset_id"])
+            .reindex(columns=["male", "wealth", "name", "share"])
+        )
+        return expected
+
+    def test_join_multi_levels(self, portfolio, household, expected):
+        portfolio = portfolio.copy()
+        household = household.copy()
+
+        # GH 3662
+        # merge multi-levels
+        result = household.join(portfolio, how="inner")
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_levels_merge_equivalence(self, portfolio, household, expected):
+        portfolio = portfolio.copy()
+        household = household.copy()
+
+        # equivalency
+        result = merge(
+            household.reset_index(),
+            portfolio.reset_index(),
+            on=["household_id"],
+            how="inner",
+        ).set_index(["household_id", "asset_id"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_levels_outer(self, portfolio, household, expected):
+        portfolio = portfolio.copy()
+        household = household.copy()
+
+        result = household.join(portfolio, how="outer")
+        expected = concat(
+            [
+                expected,
+                (
+                    DataFrame(
+                        {"share": [1.00]},
+                        index=MultiIndex.from_tuples(
+                            [(4, np.nan)], names=["household_id", "asset_id"]
+                        ),
+                    )
+                ),
+            ],
+            axis=0,
+            sort=True,
+        ).reindex(columns=expected.columns)
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_levels_invalid(self, portfolio, household):
+        portfolio = portfolio.copy()
+        household = household.copy()
+
+        # invalid cases
+        household.index.name = "foo"
+
+        with pytest.raises(
+            ValueError, match="cannot join with no overlapping index names"
+        ):
+            household.join(portfolio, how="inner")
+
+        portfolio2 = portfolio.copy()
+        portfolio2.index.set_names(["household_id", "foo"])
+
+        with pytest.raises(ValueError, match="columns overlap but no suffix specified"):
+            portfolio2.join(portfolio, how="inner")
+
+    def test_join_multi_levels2(self):
+
+        # some more advanced merges
+        # GH6360
+        household = DataFrame(
+            {
+                "household_id": [1, 2, 2, 3, 3, 3, 4],
+                "asset_id": [
+                    "nl0000301109",
+                    "nl0000301109",
+                    "gb00b03mlx29",
+                    "gb00b03mlx29",
+                    "lu0197800237",
+                    "nl0000289965",
+                    np.nan,
+                ],
+                "share": [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0],
+            },
+            columns=["household_id", "asset_id", "share"],
+        ).set_index(["household_id", "asset_id"])
+
+        log_return = DataFrame(
+            {
+                "asset_id": [
+                    "gb00b03mlx29",
+                    "gb00b03mlx29",
+                    "gb00b03mlx29",
+                    "lu0197800237",
+                    "lu0197800237",
+                ],
+                "t": [233, 234, 235, 180, 181],
+                "log_return": [
+                    0.09604978,
+                    -0.06524096,
+                    0.03532373,
+                    0.03025441,
+                    0.036997,
+                ],
+            }
+        ).set_index(["asset_id", "t"])
+
+        expected = (
+            DataFrame(
+                {
+                    "household_id": [2, 2, 2, 3, 3, 3, 3, 3],
+                    "asset_id": [
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "lu0197800237",
+                        "lu0197800237",
+                    ],
+                    "t": [233, 234, 235, 233, 234, 235, 180, 181],
+                    "share": [0.6, 0.6, 0.6, 0.15, 0.15, 0.15, 0.6, 0.6],
+                    "log_return": [
+                        0.09604978,
+                        -0.06524096,
+                        0.03532373,
+                        0.09604978,
+                        -0.06524096,
+                        0.03532373,
+                        0.03025441,
+                        0.036997,
+                    ],
+                }
+            )
+            .set_index(["household_id", "asset_id", "t"])
+            .reindex(columns=["share", "log_return"])
+        )
+
+        # this is the equivalency
+        result = merge(
+            household.reset_index(),
+            log_return.reset_index(),
+            on=["asset_id"],
+            how="inner",
+        ).set_index(["household_id", "asset_id", "t"])
+        tm.assert_frame_equal(result, expected)
+
+        expected = (
+            DataFrame(
+                {
+                    "household_id": [1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4],
+                    "asset_id": [
+                        "nl0000301109",
+                        "nl0000301109",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "gb00b03mlx29",
+                        "lu0197800237",
+                        "lu0197800237",
+                        "nl0000289965",
+                        None,
+                    ],
+                    "t": [
+                        None,
+                        None,
+                        233,
+                        234,
+                        235,
+                        233,
+                        234,
+                        235,
+                        180,
+                        181,
+                        None,
+                        None,
+                    ],
+                    "share": [
+                        1.0,
+                        0.4,
+                        0.6,
+                        0.6,
+                        0.6,
+                        0.15,
+                        0.15,
+                        0.15,
+                        0.6,
+                        0.6,
+                        0.25,
+                        1.0,
+                    ],
+                    "log_return": [
+                        None,
+                        None,
+                        0.09604978,
+                        -0.06524096,
+                        0.03532373,
+                        0.09604978,
+                        -0.06524096,
+                        0.03532373,
+                        0.03025441,
+                        0.036997,
+                        None,
+                        None,
+                    ],
+                }
+            )
+            .set_index(["household_id", "asset_id", "t"])
+            .reindex(columns=["share", "log_return"])
+        )
+
+        result = merge(
+            household.reset_index(),
+            log_return.reset_index(),
+            on=["asset_id"],
+            how="outer",
+        ).set_index(["household_id", "asset_id", "t"])
+
+        tm.assert_frame_equal(result, expected)
+
+
+class TestJoinMultiMulti:
+    def test_join_multi_multi(
+        self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
+    ):
+        # Multi-index join tests
+        expected = (
+            merge(
+                left_multi.reset_index(),
+                right_multi.reset_index(),
+                how=join_type,
+                on=on_cols_multi,
+            )
+            .set_index(idx_cols_multi)
+            .sort_index()
+        )
+
+        result = left_multi.join(right_multi, how=join_type).sort_index()
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_empty_frames(
+        self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
+    ):
+
+        left_multi = left_multi.drop(columns=left_multi.columns)
+        right_multi = right_multi.drop(columns=right_multi.columns)
+
+        expected = (
+            merge(
+                left_multi.reset_index(),
+                right_multi.reset_index(),
+                how=join_type,
+                on=on_cols_multi,
+            )
+            .set_index(idx_cols_multi)
+            .sort_index()
+        )
+
+        result = left_multi.join(right_multi, how=join_type).sort_index()
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("box", [None, np.asarray, Series, Index])
+    def test_merge_datetime_index(self, box):
+        # see gh-19038
+        df = DataFrame(
+            [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"]
+        )
+        df.index = pd.to_datetime(df.index)
+        on_vector = df.index.year
+
+        if box is not None:
+            on_vector = box(on_vector)
+
+        expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]})
+
+        result = df.merge(df, on=["a", on_vector], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(
+            {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]}
+        )
+
+        result = df.merge(df, on=[df.index.year], how="inner")
+        tm.assert_frame_equal(result, expected)
+
+    def test_single_common_level(self):
+        index_left = MultiIndex.from_tuples(
+            [("K0", "X0"), ("K0", "X1"), ("K1", "X2")], names=["key", "X"]
+        )
+
+        left = DataFrame(
+            {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=index_left
+        )
+
+        index_right = MultiIndex.from_tuples(
+            [("K0", "Y0"), ("K1", "Y1"), ("K2", "Y2"), ("K2", "Y3")], names=["key", "Y"]
+        )
+
+        right = DataFrame(
+            {"C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"]},
+            index=index_right,
+        )
+
+        result = left.join(right)
+        expected = merge(
+            left.reset_index(), right.reset_index(), on=["key"], how="inner"
+        ).set_index(["key", "X", "Y"])
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_join_multi_wrong_order(self):
+        # GH 25760
+        # GH 28956
+
+        midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
+        midx3 = MultiIndex.from_tuples([(4, 1), (3, 2), (3, 1)], names=["b", "a"])
+
+        left = DataFrame(index=midx1, data={"x": [10, 20, 30, 40]})
+        right = DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]})
+
+        result = left.join(right)
+
+        expected = DataFrame(
+            index=midx1,
+            data={"x": [10, 20, 30, 40], "y": ["fing", "foo", "bar", np.nan]},
+        )
+
+        tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/test_crosstab.py
+++ b/dist/client/pandas/tests/reshape/test_crosstab.py
@@ -0,0 +1,827 @@
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.common import is_categorical_dtype
+
+import pandas as pd
+from pandas import (
+    CategoricalIndex,
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    crosstab,
+)
+import pandas._testing as tm
+
+
+class TestCrosstab:
+    def setup_method(self, method):
+        df = DataFrame(
+            {
+                "A": [
+                    "foo",
+                    "foo",
+                    "foo",
+                    "foo",
+                    "bar",
+                    "bar",
+                    "bar",
+                    "bar",
+                    "foo",
+                    "foo",
+                    "foo",
+                ],
+                "B": [
+                    "one",
+                    "one",
+                    "one",
+                    "two",
+                    "one",
+                    "one",
+                    "one",
+                    "two",
+                    "two",
+                    "two",
+                    "one",
+                ],
+                "C": [
+                    "dull",
+                    "dull",
+                    "shiny",
+                    "dull",
+                    "dull",
+                    "shiny",
+                    "shiny",
+                    "dull",
+                    "shiny",
+                    "shiny",
+                    "shiny",
+                ],
+                "D": np.random.randn(11),
+                "E": np.random.randn(11),
+                "F": np.random.randn(11),
+            }
+        )
+
+        self.df = pd.concat([df, df], ignore_index=True)
+
+    def test_crosstab_single(self):
+        df = self.df
+        result = crosstab(df["A"], df["C"])
+        expected = df.groupby(["A", "C"]).size().unstack()
+        tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64))
+
+    def test_crosstab_multiple(self):
+        df = self.df
+
+        result = crosstab(df["A"], [df["B"], df["C"]])
+        expected = df.groupby(["A", "B", "C"]).size()
+        expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64)
+        tm.assert_frame_equal(result, expected)
+
+        result = crosstab([df["B"], df["C"]], df["A"])
+        expected = df.groupby(["B", "C", "A"]).size()
+        expected = expected.unstack("A").fillna(0).astype(np.int64)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("box", [np.array, list, tuple])
+    def test_crosstab_ndarray(self, box):
+        # GH 44076
+        a = box(np.random.randint(0, 5, size=100))
+        b = box(np.random.randint(0, 3, size=100))
+        c = box(np.random.randint(0, 10, size=100))
+
+        df = DataFrame({"a": a, "b": b, "c": c})
+
+        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"))
+        expected = crosstab(df["a"], [df["b"], df["c"]])
+        tm.assert_frame_equal(result, expected)
+
+        result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c"))
+        expected = crosstab([df["b"], df["c"]], df["a"])
+        tm.assert_frame_equal(result, expected)
+
+        # assign arbitrary names
+        result = crosstab(a, c)
+        expected = crosstab(df["a"], df["c"])
+        expected.index.names = ["row_0"]
+        expected.columns.names = ["col_0"]
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_non_aligned(self):
+        # GH 17005
+        a = Series([0, 1, 1], index=["a", "b", "c"])
+        b = Series([3, 4, 3, 4, 3], index=["a", "b", "c", "d", "f"])
+        c = np.array([3, 4, 3])
+
+        expected = DataFrame(
+            [[1, 0], [1, 1]],
+            index=Index([0, 1], name="row_0"),
+            columns=Index([3, 4], name="col_0"),
+        )
+
+        result = crosstab(a, b)
+        tm.assert_frame_equal(result, expected)
+
+        result = crosstab(a, c)
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_margins(self):
+        a = np.random.randint(0, 7, size=100)
+        b = np.random.randint(0, 3, size=100)
+        c = np.random.randint(0, 5, size=100)
+
+        df = DataFrame({"a": a, "b": b, "c": c})
+
+        result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True)
+
+        assert result.index.names == ("a",)
+        assert result.columns.names == ["b", "c"]
+
+        all_cols = result["All", ""]
+        exp_cols = df.groupby(["a"]).size().astype("i8")
+        # to keep index.name
+        exp_margin = Series([len(df)], index=Index(["All"], name="a"))
+        exp_cols = pd.concat([exp_cols, exp_margin])
+        exp_cols.name = ("All", "")
+
+        tm.assert_series_equal(all_cols, exp_cols)
+
+        all_rows = result.loc["All"]
+        exp_rows = df.groupby(["b", "c"]).size().astype("i8")
+        exp_rows = pd.concat([exp_rows, Series([len(df)], index=[("All", "")])])
+        exp_rows.name = "All"
+
+        exp_rows = exp_rows.reindex(all_rows.index)
+        exp_rows = exp_rows.fillna(0).astype(np.int64)
+        tm.assert_series_equal(all_rows, exp_rows)
+
+    def test_crosstab_margins_set_margin_name(self):
+        # GH 15972
+        a = np.random.randint(0, 7, size=100)
+        b = np.random.randint(0, 3, size=100)
+        c = np.random.randint(0, 5, size=100)
+
+        df = DataFrame({"a": a, "b": b, "c": c})
+
+        result = crosstab(
+            a,
+            [b, c],
+            rownames=["a"],
+            colnames=("b", "c"),
+            margins=True,
+            margins_name="TOTAL",
+        )
+
+        assert result.index.names == ("a",)
+        assert result.columns.names == ["b", "c"]
+
+        all_cols = result["TOTAL", ""]
+        exp_cols = df.groupby(["a"]).size().astype("i8")
+        # to keep index.name
+        exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a"))
+        exp_cols = pd.concat([exp_cols, exp_margin])
+        exp_cols.name = ("TOTAL", "")
+
+        tm.assert_series_equal(all_cols, exp_cols)
+
+        all_rows = result.loc["TOTAL"]
+        exp_rows = df.groupby(["b", "c"]).size().astype("i8")
+        exp_rows = pd.concat([exp_rows, Series([len(df)], index=[("TOTAL", "")])])
+        exp_rows.name = "TOTAL"
+
+        exp_rows = exp_rows.reindex(all_rows.index)
+        exp_rows = exp_rows.fillna(0).astype(np.int64)
+        tm.assert_series_equal(all_rows, exp_rows)
+
+        msg = "margins_name argument must be a string"
+        for margins_name in [666, None, ["a", "b"]]:
+            with pytest.raises(ValueError, match=msg):
+                crosstab(
+                    a,
+                    [b, c],
+                    rownames=["a"],
+                    colnames=("b", "c"),
+                    margins=True,
+                    margins_name=margins_name,
+                )
+
+    def test_crosstab_pass_values(self):
+        a = np.random.randint(0, 7, size=100)
+        b = np.random.randint(0, 3, size=100)
+        c = np.random.randint(0, 5, size=100)
+        values = np.random.randn(100)
+
+        table = crosstab(
+            [a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"]
+        )
+
+        df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values})
+
+        expected = df.pivot_table(
+            "values", index=["foo", "bar"], columns="baz", aggfunc=np.sum
+        )
+        tm.assert_frame_equal(table, expected)
+
+    def test_crosstab_dropna(self):
+        # GH 3820
+        a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
+        b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object)
+        c = np.array(
+            ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object
+        )
+        res = crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False)
+        m = MultiIndex.from_tuples(
+            [("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")],
+            names=["b", "c"],
+        )
+        tm.assert_index_equal(res.columns, m)
+
+    def test_crosstab_no_overlap(self):
+        # GS 10291
+
+        s1 = Series([1, 2, 3], index=[1, 2, 3])
+        s2 = Series([4, 5, 6], index=[4, 5, 6])
+
+        actual = crosstab(s1, s2)
+        expected = DataFrame(
+            index=Index([], dtype="int64", name="row_0"),
+            columns=Index([], dtype="int64", name="col_0"),
+        )
+
+        tm.assert_frame_equal(actual, expected)
+
+    def test_margin_dropna(self):
+        # GH 12577
+        # pivot_table counts null into margin ('All')
+        # when margins=true and dropna=true
+
+        df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
+        actual = crosstab(df.a, df.b, margins=True, dropna=True)
+        expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3, 4, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+    def test_margin_dropna2(self):
+
+        df = DataFrame(
+            {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
+        )
+        actual = crosstab(df.a, df.b, margins=True, dropna=True)
+        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3.0, 4.0, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+    def test_margin_dropna3(self):
+
+        df = DataFrame(
+            {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]}
+        )
+        actual = crosstab(df.a, df.b, margins=True, dropna=True)
+        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3, 4, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+    def test_margin_dropna4(self):
+        # GH 12642
+        # _add_margins raises KeyError: Level None not found
+        # when margins=True and dropna=False
+        df = DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]})
+        actual = crosstab(df.a, df.b, margins=True, dropna=False)
+        expected = DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3, 4, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+    def test_margin_dropna5(self):
+        df = DataFrame(
+            {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
+        )
+        actual = crosstab(df.a, df.b, margins=True, dropna=False)
+        expected = DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
+        expected.index = Index([1.0, 2.0, "All"], name="a")
+        expected.columns = Index([3.0, 4.0, "All"], name="b")
+        tm.assert_frame_equal(actual, expected)
+
+    def test_margin_dropna6(self):
+        a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
+        b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object)
+        c = np.array(
+            ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object
+        )
+
+        actual = crosstab(
+            a, [b, c], rownames=["a"], colnames=["b", "c"], margins=True, dropna=False
+        )
+        m = MultiIndex.from_arrays(
+            [
+                ["one", "one", "two", "two", "All"],
+                ["dull", "shiny", "dull", "shiny", ""],
+            ],
+            names=["b", "c"],
+        )
+        expected = DataFrame(
+            [[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m
+        )
+        expected.index = Index(["bar", "foo", "All"], name="a")
+        tm.assert_frame_equal(actual, expected)
+
+        actual = crosstab(
+            [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False
+        )
+        m = MultiIndex.from_arrays(
+            [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
+            names=["a", "b"],
+        )
+        expected = DataFrame(
+            [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m
+        )
+        expected.columns = Index(["dull", "shiny", "All"], name="c")
+        tm.assert_frame_equal(actual, expected)
+
+        actual = crosstab(
+            [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=True
+        )
+        m = MultiIndex.from_arrays(
+            [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]],
+            names=["a", "b"],
+        )
+        expected = DataFrame(
+            [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 1, 6]], index=m
+        )
+        expected.columns = Index(["dull", "shiny", "All"], name="c")
+        tm.assert_frame_equal(actual, expected)
+
+    def test_crosstab_normalize(self):
+        # Issue 12578
+        df = DataFrame(
+            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
+        )
+
+        rindex = Index([1, 2], name="a")
+        cindex = Index([3, 4], name="b")
+        full_normal = DataFrame([[0.2, 0], [0.2, 0.6]], index=rindex, columns=cindex)
+        row_normal = DataFrame([[1.0, 0], [0.25, 0.75]], index=rindex, columns=cindex)
+        col_normal = DataFrame([[0.5, 0], [0.5, 1.0]], index=rindex, columns=cindex)
+
+        # Check all normalize args
+        tm.assert_frame_equal(crosstab(df.a, df.b, normalize="all"), full_normal)
+        tm.assert_frame_equal(crosstab(df.a, df.b, normalize=True), full_normal)
+        tm.assert_frame_equal(crosstab(df.a, df.b, normalize="index"), row_normal)
+        tm.assert_frame_equal(crosstab(df.a, df.b, normalize="columns"), col_normal)
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize=1),
+            crosstab(df.a, df.b, normalize="columns"),
+        )
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize=0), crosstab(df.a, df.b, normalize="index")
+        )
+
+        row_normal_margins = DataFrame(
+            [[1.0, 0], [0.25, 0.75], [0.4, 0.6]],
+            index=Index([1, 2, "All"], name="a", dtype="object"),
+            columns=Index([3, 4], name="b", dtype="object"),
+        )
+        col_normal_margins = DataFrame(
+            [[0.5, 0, 0.2], [0.5, 1.0, 0.8]],
+            index=Index([1, 2], name="a", dtype="object"),
+            columns=Index([3, 4, "All"], name="b", dtype="object"),
+        )
+
+        all_normal_margins = DataFrame(
+            [[0.2, 0, 0.2], [0.2, 0.6, 0.8], [0.4, 0.6, 1]],
+            index=Index([1, 2, "All"], name="a", dtype="object"),
+            columns=Index([3, 4, "All"], name="b", dtype="object"),
+        )
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize="index", margins=True), row_normal_margins
+        )
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize="columns", margins=True), col_normal_margins
+        )
+        tm.assert_frame_equal(
+            crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins
+        )
+
+    def test_crosstab_normalize_arrays(self):
+        # GH#12578
+        df = DataFrame(
+            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
+        )
+
+        # Test arrays
+        crosstab(
+            [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2])
+        )
+
+        # Test with aggfunc
+        norm_counts = DataFrame(
+            [[0.25, 0, 0.25], [0.25, 0.5, 0.75], [0.5, 0.5, 1]],
+            index=Index([1, 2, "All"], name="a", dtype="object"),
+            columns=Index([3, 4, "All"], name="b"),
+        )
+        test_case = crosstab(
+            df.a, df.b, df.c, aggfunc="count", normalize="all", margins=True
+        )
+        tm.assert_frame_equal(test_case, norm_counts)
+
+        df = DataFrame(
+            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [0, 4, np.nan, 3, 3]}
+        )
+
+        norm_sum = DataFrame(
+            [[0, 0, 0.0], [0.4, 0.6, 1], [0.4, 0.6, 1]],
+            index=Index([1, 2, "All"], name="a", dtype="object"),
+            columns=Index([3, 4, "All"], name="b", dtype="object"),
+        )
+        test_case = crosstab(
+            df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True
+        )
+        tm.assert_frame_equal(test_case, norm_sum)
+
+    def test_crosstab_with_empties(self, using_array_manager):
+        # Check handling of empties
+        df = DataFrame(
+            {
+                "a": [1, 2, 2, 2, 2],
+                "b": [3, 3, 4, 4, 4],
+                "c": [np.nan, np.nan, np.nan, np.nan, np.nan],
+            }
+        )
+
+        empty = DataFrame(
+            [[0.0, 0.0], [0.0, 0.0]],
+            index=Index([1, 2], name="a", dtype="int64"),
+            columns=Index([3, 4], name="b"),
+        )
+
+        for i in [True, "index", "columns"]:
+            calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=i)
+            tm.assert_frame_equal(empty, calculated)
+
+        nans = DataFrame(
+            [[0.0, np.nan], [0.0, 0.0]],
+            index=Index([1, 2], name="a", dtype="int64"),
+            columns=Index([3, 4], name="b"),
+        )
+        if using_array_manager:
+            # INFO(ArrayManager) column without NaNs can preserve int dtype
+            nans[3] = nans[3].astype("int64")
+
+        calculated = crosstab(df.a, df.b, values=df.c, aggfunc="count", normalize=False)
+        tm.assert_frame_equal(nans, calculated)
+
+    def test_crosstab_errors(self):
+        # Issue 12578
+
+        df = DataFrame(
+            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
+        )
+
+        error = "values cannot be used without an aggfunc."
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, values=df.c)
+
+        error = "aggfunc cannot be used without values"
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, aggfunc=np.mean)
+
+        error = "Not a valid normalize argument"
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, normalize="42")
+
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, normalize=42)
+
+        error = "Not a valid margins argument"
+        with pytest.raises(ValueError, match=error):
+            crosstab(df.a, df.b, normalize="all", margins=42)
+
+    def test_crosstab_with_categorial_columns(self):
+        # GH 8860
+        df = DataFrame(
+            {
+                "MAKE": ["Honda", "Acura", "Tesla", "Honda", "Honda", "Acura"],
+                "MODEL": ["Sedan", "Sedan", "Electric", "Pickup", "Sedan", "Sedan"],
+            }
+        )
+        categories = ["Sedan", "Electric", "Pickup"]
+        df["MODEL"] = df["MODEL"].astype("category").cat.set_categories(categories)
+        result = crosstab(df["MAKE"], df["MODEL"])
+
+        expected_index = Index(["Acura", "Honda", "Tesla"], name="MAKE")
+        expected_columns = CategoricalIndex(
+            categories, categories=categories, ordered=False, name="MODEL"
+        )
+        expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]]
+        expected = DataFrame(
+            expected_data, index=expected_index, columns=expected_columns
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_with_numpy_size(self):
+        # GH 4003
+        df = DataFrame(
+            {
+                "A": ["one", "one", "two", "three"] * 6,
+                "B": ["A", "B", "C"] * 8,
+                "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+                "D": np.random.randn(24),
+                "E": np.random.randn(24),
+            }
+        )
+        result = crosstab(
+            index=[df["A"], df["B"]],
+            columns=[df["C"]],
+            margins=True,
+            aggfunc=np.size,
+            values=df["D"],
+        )
+        expected_index = MultiIndex(
+            levels=[["All", "one", "three", "two"], ["", "A", "B", "C"]],
+            codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]],
+            names=["A", "B"],
+        )
+        expected_column = Index(["bar", "foo", "All"], dtype="object", name="C")
+        expected_data = np.array(
+            [
+                [2.0, 2.0, 4.0],
+                [2.0, 2.0, 4.0],
+                [2.0, 2.0, 4.0],
+                [2.0, np.nan, 2.0],
+                [np.nan, 2.0, 2.0],
+                [2.0, np.nan, 2.0],
+                [np.nan, 2.0, 2.0],
+                [2.0, np.nan, 2.0],
+                [np.nan, 2.0, 2.0],
+                [12.0, 12.0, 24.0],
+            ]
+        )
+        expected = DataFrame(
+            expected_data, index=expected_index, columns=expected_column
+        )
+        # aggfunc is np.size, resulting in integers
+        expected["All"] = expected["All"].astype("int64")
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_duplicate_names(self):
+        # GH 13279 / 22529
+
+        s1 = Series(range(3), name="foo")
+        s2_foo = Series(range(1, 4), name="foo")
+        s2_bar = Series(range(1, 4), name="bar")
+        s3 = Series(range(3), name="waldo")
+
+        # check result computed with duplicate labels against
+        # result computed with unique labels, then relabelled
+        mapper = {"bar": "foo"}
+
+        # duplicate row, column labels
+        result = crosstab(s1, s2_foo)
+        expected = crosstab(s1, s2_bar).rename_axis(columns=mapper, axis=1)
+        tm.assert_frame_equal(result, expected)
+
+        # duplicate row, unique column labels
+        result = crosstab([s1, s2_foo], s3)
+        expected = crosstab([s1, s2_bar], s3).rename_axis(index=mapper, axis=0)
+        tm.assert_frame_equal(result, expected)
+
+        # unique row, duplicate column labels
+        result = crosstab(s3, [s1, s2_foo])
+        expected = crosstab(s3, [s1, s2_bar]).rename_axis(columns=mapper, axis=1)
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("names", [["a", ("b", "c")], [("a", "b"), "c"]])
+    def test_crosstab_tuple_name(self, names):
+        s1 = Series(range(3), name=names[0])
+        s2 = Series(range(1, 4), name=names[1])
+
+        mi = MultiIndex.from_arrays([range(3), range(1, 4)], names=names)
+        expected = Series(1, index=mi).unstack(1, fill_value=0)
+
+        result = crosstab(s1, s2)
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_both_tuple_names(self):
+        # GH 18321
+        s1 = Series(range(3), name=("a", "b"))
+        s2 = Series(range(3), name=("c", "d"))
+
+        expected = DataFrame(
+            np.eye(3, dtype="int64"),
+            index=Index(range(3), name=("a", "b")),
+            columns=Index(range(3), name=("c", "d")),
+        )
+        result = crosstab(s1, s2)
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_unsorted_order(self):
+        df = DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"])
+        result = crosstab(df.index, [df.b, df.a])
+        e_idx = Index(["A", "B", "C"], name="row_0")
+        e_columns = MultiIndex.from_tuples([(1, 4), (2, 6), (3, 5)], names=["b", "a"])
+        expected = DataFrame(
+            [[1, 0, 0], [0, 1, 0], [0, 0, 1]], index=e_idx, columns=e_columns
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_normalize_multiple_columns(self):
+        # GH 15150
+        df = DataFrame(
+            {
+                "A": ["one", "one", "two", "three"] * 6,
+                "B": ["A", "B", "C"] * 8,
+                "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+                "D": [0] * 24,
+                "E": [0] * 24,
+            }
+        )
+        result = crosstab(
+            [df.A, df.B],
+            df.C,
+            values=df.D,
+            aggfunc=np.sum,
+            normalize=True,
+            margins=True,
+        )
+        expected = DataFrame(
+            np.array([0] * 29 + [1], dtype=float).reshape(10, 3),
+            columns=Index(["bar", "foo", "All"], dtype="object", name="C"),
+            index=MultiIndex.from_tuples(
+                [
+                    ("one", "A"),
+                    ("one", "B"),
+                    ("one", "C"),
+                    ("three", "A"),
+                    ("three", "B"),
+                    ("three", "C"),
+                    ("two", "A"),
+                    ("two", "B"),
+                    ("two", "C"),
+                    ("All", ""),
+                ],
+                names=["A", "B"],
+            ),
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_margin_normalize(self):
+        # GH 27500
+        df = DataFrame(
+            {
+                "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
+                "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
+                "C": [
+                    "small",
+                    "large",
+                    "large",
+                    "small",
+                    "small",
+                    "large",
+                    "small",
+                    "small",
+                    "large",
+                ],
+                "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
+                "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
+            }
+        )
+        # normalize on index
+        result = crosstab(
+            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=0
+        )
+        expected = DataFrame(
+            [[0.5, 0.5], [0.5, 0.5], [0.666667, 0.333333], [0, 1], [0.444444, 0.555556]]
+        )
+        expected.index = MultiIndex(
+            levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]],
+            codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]],
+            names=["A", "B"],
+        )
+        expected.columns = Index(["large", "small"], dtype="object", name="C")
+        tm.assert_frame_equal(result, expected)
+
+        # normalize on columns
+        result = crosstab(
+            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=1
+        )
+        expected = DataFrame(
+            [
+                [0.25, 0.2, 0.222222],
+                [0.25, 0.2, 0.222222],
+                [0.5, 0.2, 0.333333],
+                [0, 0.4, 0.222222],
+            ]
+        )
+        expected.columns = Index(
+            ["large", "small", "Sub-Total"], dtype="object", name="C"
+        )
+        expected.index = MultiIndex(
+            levels=[["bar", "foo"], ["one", "two"]],
+            codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
+            names=["A", "B"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+        # normalize on both index and column
+        result = crosstab(
+            [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=True
+        )
+        expected = DataFrame(
+            [
+                [0.111111, 0.111111, 0.222222],
+                [0.111111, 0.111111, 0.222222],
+                [0.222222, 0.111111, 0.333333],
+                [0.000000, 0.222222, 0.222222],
+                [0.444444, 0.555555, 1],
+            ]
+        )
+        expected.columns = Index(
+            ["large", "small", "Sub-Total"], dtype="object", name="C"
+        )
+        expected.index = MultiIndex(
+            levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]],
+            codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]],
+            names=["A", "B"],
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_margin_normalize_multiple_columns(self):
+        # GH 35144
+        # use multiple columns with margins and normalization
+        df = DataFrame(
+            {
+                "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
+                "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
+                "C": [
+                    "small",
+                    "large",
+                    "large",
+                    "small",
+                    "small",
+                    "large",
+                    "small",
+                    "small",
+                    "large",
+                ],
+                "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
+                "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
+            }
+        )
+        result = crosstab(
+            index=df.C,
+            columns=[df.A, df.B],
+            margins=True,
+            margins_name="margin",
+            normalize=True,
+        )
+        expected = DataFrame(
+            [
+                [0.111111, 0.111111, 0.222222, 0.000000, 0.444444],
+                [0.111111, 0.111111, 0.111111, 0.222222, 0.555556],
+                [0.222222, 0.222222, 0.333333, 0.222222, 1.0],
+            ],
+            index=["large", "small", "margin"],
+        )
+        expected.columns = MultiIndex(
+            levels=[["bar", "foo", "margin"], ["", "one", "two"]],
+            codes=[[0, 0, 1, 1, 2], [1, 2, 1, 2, 0]],
+            names=["A", "B"],
+        )
+        expected.index.name = "C"
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("a_dtype", ["category", "int64"])
+@pytest.mark.parametrize("b_dtype", ["category", "int64"])
+def test_categoricals(a_dtype, b_dtype):
+    # https://github.com/pandas-dev/pandas/issues/37465
+    g = np.random.RandomState(25982704)
+    a = Series(g.randint(0, 3, size=100)).astype(a_dtype)
+    b = Series(g.randint(0, 2, size=100)).astype(b_dtype)
+    result = crosstab(a, b, margins=True, dropna=False)
+    columns = Index([0, 1, "All"], dtype="object", name="col_0")
+    index = Index([0, 1, 2, "All"], dtype="object", name="row_0")
+    values = [[18, 16, 34], [18, 16, 34], [16, 16, 32], [52, 48, 100]]
+    expected = DataFrame(values, index, columns)
+    tm.assert_frame_equal(result, expected)
+
+    # Verify when categorical does not have all values present
+    a.loc[a == 1] = 2
+    a_is_cat = is_categorical_dtype(a.dtype)
+    assert not a_is_cat or a.value_counts().loc[1] == 0
+    result = crosstab(a, b, margins=True, dropna=False)
+    values = [[18, 16, 34], [0, 0, 0], [34, 32, 66], [52, 48, 100]]
+    expected = DataFrame(values, index, columns)
+    if not a_is_cat:
+        expected = expected.loc[[0, 2, "All"]]
+        expected["All"] = expected["All"].astype("int64")
+    repr(result)
+    repr(expected)
+    repr(expected.loc[[0, 2, "All"]])
+    tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/test_cut.py
+++ b/dist/client/pandas/tests/reshape/test_cut.py
@@ -0,0 +1,746 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    Interval,
+    IntervalIndex,
+    Series,
+    TimedeltaIndex,
+    Timestamp,
+    cut,
+    date_range,
+    interval_range,
+    isna,
+    qcut,
+    timedelta_range,
+    to_datetime,
+)
+import pandas._testing as tm
+from pandas.api.types import CategoricalDtype as CDT
+import pandas.core.reshape.tile as tmod
+
+
+def test_simple():
+    data = np.ones(5, dtype="int64")
+    result = cut(data, 4, labels=False)
+
+    expected = np.array([1, 1, 1, 1, 1])
+    tm.assert_numpy_array_equal(result, expected, check_dtype=False)
+
+
+@pytest.mark.parametrize("func", [list, np.array])
+def test_bins(func):
+    data = func([0.2, 1.4, 2.5, 6.2, 9.7, 2.1])
+    result, bins = cut(data, 3, retbins=True)
+
+    intervals = IntervalIndex.from_breaks(bins.round(3))
+    intervals = intervals.take([0, 0, 0, 1, 2, 0])
+    expected = Categorical(intervals, ordered=True)
+
+    tm.assert_categorical_equal(result, expected)
+    tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7]))
+
+
+def test_right():
+    data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
+    result, bins = cut(data, 4, right=True, retbins=True)
+
+    intervals = IntervalIndex.from_breaks(bins.round(3))
+    expected = Categorical(intervals, ordered=True)
+    expected = expected.take([0, 0, 0, 2, 3, 0, 0])
+
+    tm.assert_categorical_equal(result, expected)
+    tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95, 7.325, 9.7]))
+
+
+def test_no_right():
+    data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
+    result, bins = cut(data, 4, right=False, retbins=True)
+
+    intervals = IntervalIndex.from_breaks(bins.round(3), closed="left")
+    intervals = intervals.take([0, 0, 0, 2, 3, 0, 1])
+    expected = Categorical(intervals, ordered=True)
+
+    tm.assert_categorical_equal(result, expected)
+    tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, 7.325, 9.7095]))
+
+
+def test_bins_from_interval_index():
+    c = cut(range(5), 3)
+    expected = c
+    result = cut(range(5), bins=expected.categories)
+    tm.assert_categorical_equal(result, expected)
+
+    expected = Categorical.from_codes(
+        np.append(c.codes, -1), categories=c.categories, ordered=True
+    )
+    result = cut(range(6), bins=expected.categories)
+    tm.assert_categorical_equal(result, expected)
+
+
+def test_bins_from_interval_index_doc_example():
+    # Make sure we preserve the bins.
+    ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60])
+    c = cut(ages, bins=[0, 18, 35, 70])
+    expected = IntervalIndex.from_tuples([(0, 18), (18, 35), (35, 70)])
+    tm.assert_index_equal(c.categories, expected)
+
+    result = cut([25, 20, 50], bins=c.categories)
+    tm.assert_index_equal(result.categories, expected)
+    tm.assert_numpy_array_equal(result.codes, np.array([1, 1, 2], dtype="int8"))
+
+
+def test_bins_not_overlapping_from_interval_index():
+    # see gh-23980
+    msg = "Overlapping IntervalIndex is not accepted"
+    ii = IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)])
+
+    with pytest.raises(ValueError, match=msg):
+        cut([5, 6], bins=ii)
+
+
+def test_bins_not_monotonic():
+    msg = "bins must increase monotonically"
+    data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1]
+
+    with pytest.raises(ValueError, match=msg):
+        cut(data, [0.1, 1.5, 1, 10])
+
+
+@pytest.mark.parametrize(
+    "x, bins, expected",
+    [
+        (
+            date_range("2017-12-31", periods=3),
+            [Timestamp.min, Timestamp("2018-01-01"), Timestamp.max],
+            IntervalIndex.from_tuples(
+                [
+                    (Timestamp.min, Timestamp("2018-01-01")),
+                    (Timestamp("2018-01-01"), Timestamp.max),
+                ]
+            ),
+        ),
+        (
+            [-1, 0, 1],
+            np.array(
+                [np.iinfo(np.int64).min, 0, np.iinfo(np.int64).max], dtype="int64"
+            ),
+            IntervalIndex.from_tuples(
+                [(np.iinfo(np.int64).min, 0), (0, np.iinfo(np.int64).max)]
+            ),
+        ),
+        (
+            [
+                np.timedelta64(-1, "ns"),
+                np.timedelta64(0, "ns"),
+                np.timedelta64(1, "ns"),
+            ],
+            np.array(
+                [
+                    np.timedelta64(-np.iinfo(np.int64).max, "ns"),
+                    np.timedelta64(0, "ns"),
+                    np.timedelta64(np.iinfo(np.int64).max, "ns"),
+                ]
+            ),
+            IntervalIndex.from_tuples(
+                [
+                    (
+                        np.timedelta64(-np.iinfo(np.int64).max, "ns"),
+                        np.timedelta64(0, "ns"),
+                    ),
+                    (
+                        np.timedelta64(0, "ns"),
+                        np.timedelta64(np.iinfo(np.int64).max, "ns"),
+                    ),
+                ]
+            ),
+        ),
+    ],
+)
+def test_bins_monotonic_not_overflowing(x, bins, expected):
+    # GH 26045
+    result = cut(x, bins)
+    tm.assert_index_equal(result.categories, expected)
+
+
+def test_wrong_num_labels():
+    msg = "Bin labels must be one fewer than the number of bin edges"
+    data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1]
+
+    with pytest.raises(ValueError, match=msg):
+        cut(data, [0, 1, 10], labels=["foo", "bar", "baz"])
+
+
+@pytest.mark.parametrize(
+    "x,bins,msg",
+    [
+        ([], 2, "Cannot cut empty array"),
+        ([1, 2, 3], 0.5, "`bins` should be a positive integer"),
+    ],
+)
+def test_cut_corner(x, bins, msg):
+    with pytest.raises(ValueError, match=msg):
+        cut(x, bins)
+
+
+@pytest.mark.parametrize("arg", [2, np.eye(2), DataFrame(np.eye(2))])
+@pytest.mark.parametrize("cut_func", [cut, qcut])
+def test_cut_not_1d_arg(arg, cut_func):
+    msg = "Input array must be 1 dimensional"
+    with pytest.raises(ValueError, match=msg):
+        cut_func(arg, 2)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [0, 1, 2, 3, 4, np.inf],
+        [-np.inf, 0, 1, 2, 3, 4],
+        [-np.inf, 0, 1, 2, 3, 4, np.inf],
+    ],
+)
+def test_int_bins_with_inf(data):
+    # GH 24314
+    msg = "cannot specify integer `bins` when input data contains infinity"
+    with pytest.raises(ValueError, match=msg):
+        cut(data, bins=3)
+
+
+def test_cut_out_of_range_more():
+    # see gh-1511
+    name = "x"
+
+    ser = Series([0, -1, 0, 1, -3], name=name)
+    ind = cut(ser, [0, 1], labels=False)
+
+    exp = Series([np.nan, np.nan, np.nan, 0, np.nan], name=name)
+    tm.assert_series_equal(ind, exp)
+
+
+@pytest.mark.parametrize(
+    "right,breaks,closed",
+    [
+        (True, [-1e-3, 0.25, 0.5, 0.75, 1], "right"),
+        (False, [0, 0.25, 0.5, 0.75, 1 + 1e-3], "left"),
+    ],
+)
+def test_labels(right, breaks, closed):
+    arr = np.tile(np.arange(0, 1.01, 0.1), 4)
+
+    result, bins = cut(arr, 4, retbins=True, right=right)
+    ex_levels = IntervalIndex.from_breaks(breaks, closed=closed)
+    tm.assert_index_equal(result.categories, ex_levels)
+
+
+def test_cut_pass_series_name_to_factor():
+    name = "foo"
+    ser = Series(np.random.randn(100), name=name)
+
+    factor = cut(ser, 4)
+    assert factor.name == name
+
+
+def test_label_precision():
+    arr = np.arange(0, 0.73, 0.01)
+    result = cut(arr, 4, precision=2)
+
+    ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36, 0.54, 0.72])
+    tm.assert_index_equal(result.categories, ex_levels)
+
+
+@pytest.mark.parametrize("labels", [None, False])
+def test_na_handling(labels):
+    arr = np.arange(0, 0.75, 0.01)
+    arr[::3] = np.nan
+
+    result = cut(arr, 4, labels=labels)
+    result = np.asarray(result)
+
+    expected = np.where(isna(arr), np.nan, result)
+    tm.assert_almost_equal(result, expected)
+
+
+def test_inf_handling():
+    data = np.arange(6)
+    data_ser = Series(data, dtype="int64")
+
+    bins = [-np.inf, 2, 4, np.inf]
+    result = cut(data, bins)
+    result_ser = cut(data_ser, bins)
+
+    ex_uniques = IntervalIndex.from_breaks(bins)
+    tm.assert_index_equal(result.categories, ex_uniques)
+
+    assert result[5] == Interval(4, np.inf)
+    assert result[0] == Interval(-np.inf, 2)
+    assert result_ser[5] == Interval(4, np.inf)
+    assert result_ser[0] == Interval(-np.inf, 2)
+
+
+def test_cut_out_of_bounds():
+    arr = np.random.randn(100)
+    result = cut(arr, [-1, 0, 1])
+
+    mask = isna(result)
+    ex_mask = (arr < -1) | (arr > 1)
+    tm.assert_numpy_array_equal(mask, ex_mask)
+
+
+@pytest.mark.parametrize(
+    "get_labels,get_expected",
+    [
+        (
+            lambda labels: labels,
+            lambda labels: Categorical(
+                ["Medium"] + 4 * ["Small"] + ["Medium", "Large"],
+                categories=labels,
+                ordered=True,
+            ),
+        ),
+        (
+            lambda labels: Categorical.from_codes([0, 1, 2], labels),
+            lambda labels: Categorical.from_codes([1] + 4 * [0] + [1, 2], labels),
+        ),
+    ],
+)
+def test_cut_pass_labels(get_labels, get_expected):
+    bins = [0, 25, 50, 100]
+    arr = [50, 5, 10, 15, 20, 30, 70]
+    labels = ["Small", "Medium", "Large"]
+
+    result = cut(arr, bins, labels=get_labels(labels))
+    tm.assert_categorical_equal(result, get_expected(labels))
+
+
+def test_cut_pass_labels_compat():
+    # see gh-16459
+    arr = [50, 5, 10, 15, 20, 30, 70]
+    labels = ["Good", "Medium", "Bad"]
+
+    result = cut(arr, 3, labels=labels)
+    exp = cut(arr, 3, labels=Categorical(labels, categories=labels, ordered=True))
+    tm.assert_categorical_equal(result, exp)
+
+
+@pytest.mark.parametrize("x", [np.arange(11.0), np.arange(11.0) / 1e10])
+def test_round_frac_just_works(x):
+    # It works.
+    cut(x, 2)
+
+
+@pytest.mark.parametrize(
+    "val,precision,expected",
+    [
+        (-117.9998, 3, -118),
+        (117.9998, 3, 118),
+        (117.9998, 2, 118),
+        (0.000123456, 2, 0.00012),
+    ],
+)
+def test_round_frac(val, precision, expected):
+    # see gh-1979
+    result = tmod._round_frac(val, precision=precision)
+    assert result == expected
+
+
+def test_cut_return_intervals():
+    ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
+    result = cut(ser, 3)
+
+    exp_bins = np.linspace(0, 8, num=4).round(3)
+    exp_bins[0] -= 0.008
+
+    expected = Series(
+        IntervalIndex.from_breaks(exp_bins, closed="right").take(
+            [0, 0, 0, 1, 1, 1, 2, 2, 2]
+        )
+    ).astype(CDT(ordered=True))
+    tm.assert_series_equal(result, expected)
+
+
+def test_series_ret_bins():
+    # see gh-8589
+    ser = Series(np.arange(4))
+    result, bins = cut(ser, 2, retbins=True)
+
+    expected = Series(
+        IntervalIndex.from_breaks([-0.003, 1.5, 3], closed="right").repeat(2)
+    ).astype(CDT(ordered=True))
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,msg",
+    [
+        ({"duplicates": "drop"}, None),
+        ({}, "Bin edges must be unique"),
+        ({"duplicates": "raise"}, "Bin edges must be unique"),
+        ({"duplicates": "foo"}, "invalid value for 'duplicates' parameter"),
+    ],
+)
+def test_cut_duplicates_bin(kwargs, msg):
+    # see gh-20947
+    bins = [0, 2, 4, 6, 10, 10]
+    values = Series(np.array([1, 3, 5, 7, 9]), index=["a", "b", "c", "d", "e"])
+
+    if msg is not None:
+        with pytest.raises(ValueError, match=msg):
+            cut(values, bins, **kwargs)
+    else:
+        result = cut(values, bins, **kwargs)
+        expected = cut(values, pd.unique(bins))
+        tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("data", [9.0, -9.0, 0.0])
+@pytest.mark.parametrize("length", [1, 2])
+def test_single_bin(data, length):
+    # see gh-14652, gh-15428
+    ser = Series([data] * length)
+    result = cut(ser, 1, labels=False)
+
+    expected = Series([0] * length, dtype=np.intp)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "array_1_writeable,array_2_writeable", [(True, True), (True, False), (False, False)]
+)
+def test_cut_read_only(array_1_writeable, array_2_writeable):
+    # issue 18773
+    array_1 = np.arange(0, 100, 10)
+    array_1.flags.writeable = array_1_writeable
+
+    array_2 = np.arange(0, 100, 10)
+    array_2.flags.writeable = array_2_writeable
+
+    hundred_elements = np.arange(100)
+    tm.assert_categorical_equal(
+        cut(hundred_elements, array_1), cut(hundred_elements, array_2)
+    )
+
+
+@pytest.mark.parametrize(
+    "conv",
+    [
+        lambda v: Timestamp(v),
+        lambda v: to_datetime(v),
+        lambda v: np.datetime64(v),
+        lambda v: Timestamp(v).to_pydatetime(),
+    ],
+)
+def test_datetime_bin(conv):
+    data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")]
+    bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"]
+
+    expected = Series(
+        IntervalIndex(
+            [
+                Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
+                Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])),
+            ]
+        )
+    ).astype(CDT(ordered=True))
+
+    bins = [conv(v) for v in bin_data]
+    result = Series(cut(data, bins=bins))
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        to_datetime(Series(["2013-01-01", "2013-01-02", "2013-01-03"])),
+        [
+            np.datetime64("2013-01-01"),
+            np.datetime64("2013-01-02"),
+            np.datetime64("2013-01-03"),
+        ],
+        np.array(
+            [
+                np.datetime64("2013-01-01"),
+                np.datetime64("2013-01-02"),
+                np.datetime64("2013-01-03"),
+            ]
+        ),
+        DatetimeIndex(["2013-01-01", "2013-01-02", "2013-01-03"]),
+    ],
+)
+def test_datetime_cut(data):
+    # see gh-14714
+    #
+    # Testing time data when it comes in various collection types.
+    result, _ = cut(data, 3, retbins=True)
+    expected = Series(
+        IntervalIndex(
+            [
+                Interval(
+                    Timestamp("2012-12-31 23:57:07.200000"),
+                    Timestamp("2013-01-01 16:00:00"),
+                ),
+                Interval(
+                    Timestamp("2013-01-01 16:00:00"), Timestamp("2013-01-02 08:00:00")
+                ),
+                Interval(
+                    Timestamp("2013-01-02 08:00:00"), Timestamp("2013-01-03 00:00:00")
+                ),
+            ]
+        )
+    ).astype(CDT(ordered=True))
+    tm.assert_series_equal(Series(result), expected)
+
+
+@pytest.mark.parametrize(
+    "bins",
+    [
+        3,
+        [
+            Timestamp("2013-01-01 04:57:07.200000"),
+            Timestamp("2013-01-01 21:00:00"),
+            Timestamp("2013-01-02 13:00:00"),
+            Timestamp("2013-01-03 05:00:00"),
+        ],
+    ],
+)
+@pytest.mark.parametrize("box", [list, np.array, Index, Series])
+def test_datetime_tz_cut(bins, box):
+    # see gh-19872
+    tz = "US/Eastern"
+    s = Series(date_range("20130101", periods=3, tz=tz))
+
+    if not isinstance(bins, int):
+        bins = box(bins)
+
+    result = cut(s, bins)
+    expected = Series(
+        IntervalIndex(
+            [
+                Interval(
+                    Timestamp("2012-12-31 23:57:07.200000", tz=tz),
+                    Timestamp("2013-01-01 16:00:00", tz=tz),
+                ),
+                Interval(
+                    Timestamp("2013-01-01 16:00:00", tz=tz),
+                    Timestamp("2013-01-02 08:00:00", tz=tz),
+                ),
+                Interval(
+                    Timestamp("2013-01-02 08:00:00", tz=tz),
+                    Timestamp("2013-01-03 00:00:00", tz=tz),
+                ),
+            ]
+        )
+    ).astype(CDT(ordered=True))
+    tm.assert_series_equal(result, expected)
+
+
+def test_datetime_nan_error():
+    msg = "bins must be of datetime64 dtype"
+
+    with pytest.raises(ValueError, match=msg):
+        cut(date_range("20130101", periods=3), bins=[0, 2, 4])
+
+
+def test_datetime_nan_mask():
+    result = cut(
+        date_range("20130102", periods=5), bins=date_range("20130101", periods=2)
+    )
+
+    mask = result.categories.isna()
+    tm.assert_numpy_array_equal(mask, np.array([False]))
+
+    mask = result.isna()
+    tm.assert_numpy_array_equal(mask, np.array([False, True, True, True, True]))
+
+
+@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
+def test_datetime_cut_roundtrip(tz):
+    # see gh-19891
+    ser = Series(date_range("20180101", periods=3, tz=tz))
+    result, result_bins = cut(ser, 2, retbins=True)
+
+    expected = cut(ser, result_bins)
+    tm.assert_series_equal(result, expected)
+
+    expected_bins = DatetimeIndex(
+        ["2017-12-31 23:57:07.200000", "2018-01-02 00:00:00", "2018-01-03 00:00:00"]
+    )
+    expected_bins = expected_bins.tz_localize(tz)
+    tm.assert_index_equal(result_bins, expected_bins)
+
+
+def test_timedelta_cut_roundtrip():
+    # see gh-19891
+    ser = Series(timedelta_range("1day", periods=3))
+    result, result_bins = cut(ser, 2, retbins=True)
+
+    expected = cut(ser, result_bins)
+    tm.assert_series_equal(result, expected)
+
+    expected_bins = TimedeltaIndex(
+        ["0 days 23:57:07.200000", "2 days 00:00:00", "3 days 00:00:00"]
+    )
+    tm.assert_index_equal(result_bins, expected_bins)
+
+
+@pytest.mark.parametrize("bins", [6, 7])
+@pytest.mark.parametrize(
+    "box, compare",
+    [
+        (Series, tm.assert_series_equal),
+        (np.array, tm.assert_categorical_equal),
+        (list, tm.assert_equal),
+    ],
+)
+def test_cut_bool_coercion_to_int(bins, box, compare):
+    # issue 20303
+    data_expected = box([0, 1, 1, 0, 1] * 10)
+    data_result = box([False, True, True, False, True] * 10)
+    expected = cut(data_expected, bins, duplicates="drop")
+    result = cut(data_result, bins, duplicates="drop")
+    compare(result, expected)
+
+
+@pytest.mark.parametrize("labels", ["foo", 1, True])
+def test_cut_incorrect_labels(labels):
+    # GH 13318
+    values = range(5)
+    msg = "Bin labels must either be False, None or passed in as a list-like argument"
+    with pytest.raises(ValueError, match=msg):
+        cut(values, 4, labels=labels)
+
+
+@pytest.mark.parametrize("bins", [3, [0, 5, 15]])
+@pytest.mark.parametrize("right", [True, False])
+@pytest.mark.parametrize("include_lowest", [True, False])
+def test_cut_nullable_integer(bins, right, include_lowest):
+    a = np.random.randint(0, 10, size=50).astype(float)
+    a[::2] = np.nan
+    result = cut(
+        pd.array(a, dtype="Int64"), bins, right=right, include_lowest=include_lowest
+    )
+    expected = cut(a, bins, right=right, include_lowest=include_lowest)
+    tm.assert_categorical_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data, bins, labels, expected_codes, expected_labels",
+    [
+        ([15, 17, 19], [14, 16, 18, 20], ["A", "B", "A"], [0, 1, 0], ["A", "B"]),
+        ([1, 3, 5], [0, 2, 4, 6, 8], [2, 0, 1, 2], [2, 0, 1], [0, 1, 2]),
+    ],
+)
+def test_cut_non_unique_labels(data, bins, labels, expected_codes, expected_labels):
+    # GH 33141
+    result = cut(data, bins=bins, labels=labels, ordered=False)
+    expected = Categorical.from_codes(
+        expected_codes, categories=expected_labels, ordered=False
+    )
+    tm.assert_categorical_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data, bins, labels, expected_codes, expected_labels",
+    [
+        ([15, 17, 19], [14, 16, 18, 20], ["C", "B", "A"], [0, 1, 2], ["C", "B", "A"]),
+        ([1, 3, 5], [0, 2, 4, 6, 8], [3, 0, 1, 2], [0, 1, 2], [3, 0, 1, 2]),
+    ],
+)
+def test_cut_unordered_labels(data, bins, labels, expected_codes, expected_labels):
+    # GH 33141
+    result = cut(data, bins=bins, labels=labels, ordered=False)
+    expected = Categorical.from_codes(
+        expected_codes, categories=expected_labels, ordered=False
+    )
+    tm.assert_categorical_equal(result, expected)
+
+
+def test_cut_unordered_with_missing_labels_raises_error():
+    # GH 33141
+    msg = "'labels' must be provided if 'ordered = False'"
+    with pytest.raises(ValueError, match=msg):
+        cut([0.5, 3], bins=[0, 1, 2], ordered=False)
+
+
+def test_cut_unordered_with_series_labels():
+    # https://github.com/pandas-dev/pandas/issues/36603
+    s = Series([1, 2, 3, 4, 5])
+    bins = Series([0, 2, 4, 6])
+    labels = Series(["a", "b", "c"])
+    result = cut(s, bins=bins, labels=labels, ordered=False)
+    expected = Series(["a", "a", "b", "b", "c"], dtype="category")
+    tm.assert_series_equal(result, expected)
+
+
+def test_cut_no_warnings():
+    df = DataFrame({"value": np.random.randint(0, 100, 20)})
+    labels = [f"{i} - {i + 9}" for i in range(0, 100, 10)]
+    with tm.assert_produces_warning(False):
+        df["group"] = cut(df.value, range(0, 105, 10), right=False, labels=labels)
+
+
+def test_cut_with_duplicated_index_lowest_included():
+    # GH 42185
+    expected = Series(
+        [Interval(-0.001, 2, closed="right")] * 3
+        + [Interval(2, 4, closed="right"), Interval(-0.001, 2, closed="right")],
+        index=[0, 1, 2, 3, 0],
+        dtype="category",
+    ).cat.as_ordered()
+
+    s = Series([0, 1, 2, 3, 0], index=[0, 1, 2, 3, 0])
+    result = cut(s, bins=[0, 2, 4], include_lowest=True)
+    tm.assert_series_equal(result, expected)
+
+
+def test_cut_with_nonexact_categorical_indices():
+    # GH 42424
+
+    ser = Series(range(0, 100))
+    ser1 = cut(ser, 10).value_counts().head(5)
+    ser2 = cut(ser, 10).value_counts().tail(5)
+    result = DataFrame({"1": ser1, "2": ser2})
+
+    index = pd.CategoricalIndex(
+        [
+            Interval(-0.099, 9.9, closed="right"),
+            Interval(9.9, 19.8, closed="right"),
+            Interval(19.8, 29.7, closed="right"),
+            Interval(29.7, 39.6, closed="right"),
+            Interval(39.6, 49.5, closed="right"),
+            Interval(49.5, 59.4, closed="right"),
+            Interval(59.4, 69.3, closed="right"),
+            Interval(69.3, 79.2, closed="right"),
+            Interval(79.2, 89.1, closed="right"),
+            Interval(89.1, 99, closed="right"),
+        ],
+        ordered=True,
+    )
+
+    expected = DataFrame(
+        {"1": [10] * 5 + [np.nan] * 5, "2": [np.nan] * 5 + [10] * 5}, index=index
+    )
+
+    tm.assert_frame_equal(expected, result)
+
+
+def test_cut_with_timestamp_tuple_labels():
+    # GH 40661
+    labels = [(Timestamp(10),), (Timestamp(20),), (Timestamp(30),)]
+    result = cut([2, 4, 6], bins=[1, 3, 5, 7], labels=labels)
+
+    expected = Categorical.from_codes([0, 1, 2], labels, ordered=True)
+    tm.assert_categorical_equal(result, expected)
+
+
+def test_cut_bins_datetime_intervalindex():
+    # https://github.com/pandas-dev/pandas/issues/46218
+    bins = interval_range(Timestamp("2022-02-25"), Timestamp("2022-02-27"), freq="1D")
+    # passing Series instead of list is important to trigger bug
+    result = cut(Series([Timestamp("2022-02-26")]), bins=bins)
+    expected = Categorical.from_codes([0], bins, ordered=True)
+    tm.assert_categorical_equal(result.array, expected)
--- a/dist/client/pandas/tests/reshape/test_get_dummies.py
+++ b/dist/client/pandas/tests/reshape/test_get_dummies.py
@@ -0,0 +1,639 @@
+import re
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.common import is_integer_dtype
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    CategoricalIndex,
+    DataFrame,
+    Series,
+    get_dummies,
+)
+import pandas._testing as tm
+from pandas.core.arrays.sparse import (
+    SparseArray,
+    SparseDtype,
+)
+
+
+class TestGetDummies:
+    @pytest.fixture
+    def df(self):
+        return DataFrame({"A": ["a", "b", "a"], "B": ["b", "b", "c"], "C": [1, 2, 3]})
+
+    @pytest.fixture(params=["uint8", "i8", np.float64, bool, None])
+    def dtype(self, request):
+        return np.dtype(request.param)
+
+    @pytest.fixture(params=["dense", "sparse"])
+    def sparse(self, request):
+        # params are strings to simplify reading test results,
+        # e.g. TestGetDummies::test_basic[uint8-sparse] instead of [uint8-True]
+        return request.param == "sparse"
+
+    def effective_dtype(self, dtype):
+        if dtype is None:
+            return np.uint8
+        return dtype
+
+    def test_get_dummies_raises_on_dtype_object(self, df):
+        msg = "dtype=object is not a valid dtype for get_dummies"
+        with pytest.raises(ValueError, match=msg):
+            get_dummies(df, dtype="object")
+
+    def test_get_dummies_basic(self, sparse, dtype):
+        s_list = list("abc")
+        s_series = Series(s_list)
+        s_series_index = Series(s_list, list("ABC"))
+
+        expected = DataFrame(
+            {"a": [1, 0, 0], "b": [0, 1, 0], "c": [0, 0, 1]},
+            dtype=self.effective_dtype(dtype),
+        )
+        if sparse:
+            expected = expected.apply(SparseArray, fill_value=0.0)
+        result = get_dummies(s_list, sparse=sparse, dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(s_series, sparse=sparse, dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
+        expected.index = list("ABC")
+        result = get_dummies(s_series_index, sparse=sparse, dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
+    def test_get_dummies_basic_types(self, sparse, dtype):
+        # GH 10531
+        s_list = list("abc")
+        s_series = Series(s_list)
+        s_df = DataFrame(
+            {"a": [0, 1, 0, 1, 2], "b": ["A", "A", "B", "C", "C"], "c": [2, 3, 3, 3, 2]}
+        )
+
+        expected = DataFrame(
+            {"a": [1, 0, 0], "b": [0, 1, 0], "c": [0, 0, 1]},
+            dtype=self.effective_dtype(dtype),
+            columns=list("abc"),
+        )
+        if sparse:
+            if is_integer_dtype(dtype):
+                fill_value = 0
+            elif dtype == bool:
+                fill_value = False
+            else:
+                fill_value = 0.0
+
+            expected = expected.apply(SparseArray, fill_value=fill_value)
+        result = get_dummies(s_list, sparse=sparse, dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(s_series, sparse=sparse, dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(s_df, columns=s_df.columns, sparse=sparse, dtype=dtype)
+        if sparse:
+            dtype_name = f"Sparse[{self.effective_dtype(dtype).name}, {fill_value}]"
+        else:
+            dtype_name = self.effective_dtype(dtype).name
+
+        expected = Series({dtype_name: 8})
+        result = result.dtypes.value_counts()
+        result.index = [str(i) for i in result.index]
+        tm.assert_series_equal(result, expected)
+
+        result = get_dummies(s_df, columns=["a"], sparse=sparse, dtype=dtype)
+
+        expected_counts = {"int64": 1, "object": 1}
+        expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0)
+
+        expected = Series(expected_counts).sort_index()
+        result = result.dtypes.value_counts()
+        result.index = [str(i) for i in result.index]
+        result = result.sort_index()
+        tm.assert_series_equal(result, expected)
+
+    def test_get_dummies_just_na(self, sparse):
+        just_na_list = [np.nan]
+        just_na_series = Series(just_na_list)
+        just_na_series_index = Series(just_na_list, index=["A"])
+
+        res_list = get_dummies(just_na_list, sparse=sparse)
+        res_series = get_dummies(just_na_series, sparse=sparse)
+        res_series_index = get_dummies(just_na_series_index, sparse=sparse)
+
+        assert res_list.empty
+        assert res_series.empty
+        assert res_series_index.empty
+
+        assert res_list.index.tolist() == [0]
+        assert res_series.index.tolist() == [0]
+        assert res_series_index.index.tolist() == ["A"]
+
+    def test_get_dummies_include_na(self, sparse, dtype):
+        s = ["a", "b", np.nan]
+        res = get_dummies(s, sparse=sparse, dtype=dtype)
+        exp = DataFrame(
+            {"a": [1, 0, 0], "b": [0, 1, 0]}, dtype=self.effective_dtype(dtype)
+        )
+        if sparse:
+            exp = exp.apply(SparseArray, fill_value=0.0)
+        tm.assert_frame_equal(res, exp)
+
+        # Sparse dataframes do not allow nan labelled columns, see #GH8822
+        res_na = get_dummies(s, dummy_na=True, sparse=sparse, dtype=dtype)
+        exp_na = DataFrame(
+            {np.nan: [0, 0, 1], "a": [1, 0, 0], "b": [0, 1, 0]},
+            dtype=self.effective_dtype(dtype),
+        )
+        exp_na = exp_na.reindex(["a", "b", np.nan], axis=1)
+        # hack (NaN handling in assert_index_equal)
+        exp_na.columns = res_na.columns
+        if sparse:
+            exp_na = exp_na.apply(SparseArray, fill_value=0.0)
+        tm.assert_frame_equal(res_na, exp_na)
+
+        res_just_na = get_dummies([np.nan], dummy_na=True, sparse=sparse, dtype=dtype)
+        exp_just_na = DataFrame(
+            Series(1, index=[0]), columns=[np.nan], dtype=self.effective_dtype(dtype)
+        )
+        tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values)
+
+    def test_get_dummies_unicode(self, sparse):
+        # See GH 6885 - get_dummies chokes on unicode values
+        import unicodedata
+
+        e = "e"
+        eacute = unicodedata.lookup("LATIN SMALL LETTER E WITH ACUTE")
+        s = [e, eacute, eacute]
+        res = get_dummies(s, prefix="letter", sparse=sparse)
+        exp = DataFrame(
+            {"letter_e": [1, 0, 0], f"letter_{eacute}": [0, 1, 1]}, dtype=np.uint8
+        )
+        if sparse:
+            exp = exp.apply(SparseArray, fill_value=0)
+        tm.assert_frame_equal(res, exp)
+
+    def test_dataframe_dummies_all_obj(self, df, sparse):
+        df = df[["A", "B"]]
+        result = get_dummies(df, sparse=sparse)
+        expected = DataFrame(
+            {"A_a": [1, 0, 1], "A_b": [0, 1, 0], "B_b": [1, 1, 0], "B_c": [0, 0, 1]},
+            dtype=np.uint8,
+        )
+        if sparse:
+            expected = DataFrame(
+                {
+                    "A_a": SparseArray([1, 0, 1], dtype="uint8"),
+                    "A_b": SparseArray([0, 1, 0], dtype="uint8"),
+                    "B_b": SparseArray([1, 1, 0], dtype="uint8"),
+                    "B_c": SparseArray([0, 0, 1], dtype="uint8"),
+                }
+            )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_mix_default(self, df, sparse, dtype):
+        result = get_dummies(df, sparse=sparse, dtype=dtype)
+        if sparse:
+            arr = SparseArray
+            typ = SparseDtype(dtype, 0)
+        else:
+            arr = np.array
+            typ = dtype
+        expected = DataFrame(
+            {
+                "C": [1, 2, 3],
+                "A_a": arr([1, 0, 1], dtype=typ),
+                "A_b": arr([0, 1, 0], dtype=typ),
+                "B_b": arr([1, 1, 0], dtype=typ),
+                "B_c": arr([0, 0, 1], dtype=typ),
+            }
+        )
+        expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_prefix_list(self, df, sparse):
+        prefixes = ["from_A", "from_B"]
+        result = get_dummies(df, prefix=prefixes, sparse=sparse)
+        expected = DataFrame(
+            {
+                "C": [1, 2, 3],
+                "from_A_a": [1, 0, 1],
+                "from_A_b": [0, 1, 0],
+                "from_B_b": [1, 1, 0],
+                "from_B_c": [0, 0, 1],
+            },
+            dtype=np.uint8,
+        )
+        expected[["C"]] = df[["C"]]
+        cols = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"]
+        expected = expected[["C"] + cols]
+
+        typ = SparseArray if sparse else Series
+        expected[cols] = expected[cols].apply(lambda x: typ(x))
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_prefix_str(self, df, sparse):
+        # not that you should do this...
+        result = get_dummies(df, prefix="bad", sparse=sparse)
+        bad_columns = ["bad_a", "bad_b", "bad_b", "bad_c"]
+        expected = DataFrame(
+            [[1, 1, 0, 1, 0], [2, 0, 1, 1, 0], [3, 1, 0, 0, 1]],
+            columns=["C"] + bad_columns,
+            dtype=np.uint8,
+        )
+        expected = expected.astype({"C": np.int64})
+        if sparse:
+            # work around astyping & assigning with duplicate columns
+            # https://github.com/pandas-dev/pandas/issues/14427
+            expected = pd.concat(
+                [
+                    Series([1, 2, 3], name="C"),
+                    Series([1, 0, 1], name="bad_a", dtype="Sparse[uint8]"),
+                    Series([0, 1, 0], name="bad_b", dtype="Sparse[uint8]"),
+                    Series([1, 1, 0], name="bad_b", dtype="Sparse[uint8]"),
+                    Series([0, 0, 1], name="bad_c", dtype="Sparse[uint8]"),
+                ],
+                axis=1,
+            )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_subset(self, df, sparse):
+        result = get_dummies(df, prefix=["from_A"], columns=["A"], sparse=sparse)
+        expected = DataFrame(
+            {
+                "B": ["b", "b", "c"],
+                "C": [1, 2, 3],
+                "from_A_a": [1, 0, 1],
+                "from_A_b": [0, 1, 0],
+            },
+        )
+        cols = expected.columns
+        expected[cols[1:]] = expected[cols[1:]].astype(np.uint8)
+        expected[["C"]] = df[["C"]]
+        if sparse:
+            cols = ["from_A_a", "from_A_b"]
+            expected[cols] = expected[cols].astype(SparseDtype("uint8", 0))
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_prefix_sep(self, df, sparse):
+        result = get_dummies(df, prefix_sep="..", sparse=sparse)
+        expected = DataFrame(
+            {
+                "C": [1, 2, 3],
+                "A..a": [1, 0, 1],
+                "A..b": [0, 1, 0],
+                "B..b": [1, 1, 0],
+                "B..c": [0, 0, 1],
+            },
+            dtype=np.uint8,
+        )
+        expected[["C"]] = df[["C"]]
+        expected = expected[["C", "A..a", "A..b", "B..b", "B..c"]]
+        if sparse:
+            cols = ["A..a", "A..b", "B..b", "B..c"]
+            expected[cols] = expected[cols].astype(SparseDtype("uint8", 0))
+
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(df, prefix_sep=["..", "__"], sparse=sparse)
+        expected = expected.rename(columns={"B..b": "B__b", "B..c": "B__c"})
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(df, prefix_sep={"A": "..", "B": "__"}, sparse=sparse)
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_prefix_bad_length(self, df, sparse):
+        msg = re.escape(
+            "Length of 'prefix' (1) did not match the length of the columns being "
+            "encoded (2)"
+        )
+        with pytest.raises(ValueError, match=msg):
+            get_dummies(df, prefix=["too few"], sparse=sparse)
+
+    def test_dataframe_dummies_prefix_sep_bad_length(self, df, sparse):
+        msg = re.escape(
+            "Length of 'prefix_sep' (1) did not match the length of the columns being "
+            "encoded (2)"
+        )
+        with pytest.raises(ValueError, match=msg):
+            get_dummies(df, prefix_sep=["bad"], sparse=sparse)
+
+    def test_dataframe_dummies_prefix_dict(self, sparse):
+        prefixes = {"A": "from_A", "B": "from_B"}
+        df = DataFrame({"C": [1, 2, 3], "A": ["a", "b", "a"], "B": ["b", "b", "c"]})
+        result = get_dummies(df, prefix=prefixes, sparse=sparse)
+
+        expected = DataFrame(
+            {
+                "C": [1, 2, 3],
+                "from_A_a": [1, 0, 1],
+                "from_A_b": [0, 1, 0],
+                "from_B_b": [1, 1, 0],
+                "from_B_c": [0, 0, 1],
+            }
+        )
+
+        columns = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"]
+        expected[columns] = expected[columns].astype(np.uint8)
+        if sparse:
+            expected[columns] = expected[columns].astype(SparseDtype("uint8", 0))
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_with_na(self, df, sparse, dtype):
+        df.loc[3, :] = [np.nan, np.nan, np.nan]
+        result = get_dummies(df, dummy_na=True, sparse=sparse, dtype=dtype).sort_index(
+            axis=1
+        )
+
+        if sparse:
+            arr = SparseArray
+            typ = SparseDtype(dtype, 0)
+        else:
+            arr = np.array
+            typ = dtype
+
+        expected = DataFrame(
+            {
+                "C": [1, 2, 3, np.nan],
+                "A_a": arr([1, 0, 1, 0], dtype=typ),
+                "A_b": arr([0, 1, 0, 0], dtype=typ),
+                "A_nan": arr([0, 0, 0, 1], dtype=typ),
+                "B_b": arr([1, 1, 0, 0], dtype=typ),
+                "B_c": arr([0, 0, 1, 0], dtype=typ),
+                "B_nan": arr([0, 0, 0, 1], dtype=typ),
+            }
+        ).sort_index(axis=1)
+
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(df, dummy_na=False, sparse=sparse, dtype=dtype)
+        expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
+        df["cat"] = Categorical(["x", "y", "y"])
+        result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1)
+        if sparse:
+            arr = SparseArray
+            typ = SparseDtype(dtype, 0)
+        else:
+            arr = np.array
+            typ = dtype
+
+        expected = DataFrame(
+            {
+                "C": [1, 2, 3],
+                "A_a": arr([1, 0, 1], dtype=typ),
+                "A_b": arr([0, 1, 0], dtype=typ),
+                "B_b": arr([1, 1, 0], dtype=typ),
+                "B_c": arr([0, 0, 1], dtype=typ),
+                "cat_x": arr([1, 0, 0], dtype=typ),
+                "cat_y": arr([0, 1, 1], dtype=typ),
+            }
+        ).sort_index(axis=1)
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "get_dummies_kwargs,expected",
+        [
+            (
+                {"data": DataFrame({"ä": ["a"]})},
+                DataFrame({"ä_a": [1]}, dtype=np.uint8),
+            ),
+            (
+                {"data": DataFrame({"x": ["ä"]})},
+                DataFrame({"x_ä": [1]}, dtype=np.uint8),
+            ),
+            (
+                {"data": DataFrame({"x": ["a"]}), "prefix": "ä"},
+                DataFrame({"ä_a": [1]}, dtype=np.uint8),
+            ),
+            (
+                {"data": DataFrame({"x": ["a"]}), "prefix_sep": "ä"},
+                DataFrame({"xäa": [1]}, dtype=np.uint8),
+            ),
+        ],
+    )
+    def test_dataframe_dummies_unicode(self, get_dummies_kwargs, expected):
+        # GH22084 get_dummies incorrectly encodes unicode characters
+        # in dataframe column names
+        result = get_dummies(**get_dummies_kwargs)
+        tm.assert_frame_equal(result, expected)
+
+    def test_get_dummies_basic_drop_first(self, sparse):
+        # GH12402 Add a new parameter `drop_first` to avoid collinearity
+        # Basic case
+        s_list = list("abc")
+        s_series = Series(s_list)
+        s_series_index = Series(s_list, list("ABC"))
+
+        expected = DataFrame({"b": [0, 1, 0], "c": [0, 0, 1]}, dtype=np.uint8)
+
+        result = get_dummies(s_list, drop_first=True, sparse=sparse)
+        if sparse:
+            expected = expected.apply(SparseArray, fill_value=0)
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(s_series, drop_first=True, sparse=sparse)
+        tm.assert_frame_equal(result, expected)
+
+        expected.index = list("ABC")
+        result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
+        tm.assert_frame_equal(result, expected)
+
+    def test_get_dummies_basic_drop_first_one_level(self, sparse):
+        # Test the case that categorical variable only has one level.
+        s_list = list("aaa")
+        s_series = Series(s_list)
+        s_series_index = Series(s_list, list("ABC"))
+
+        expected = DataFrame(index=np.arange(3))
+
+        result = get_dummies(s_list, drop_first=True, sparse=sparse)
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(s_series, drop_first=True, sparse=sparse)
+        tm.assert_frame_equal(result, expected)
+
+        expected = DataFrame(index=list("ABC"))
+        result = get_dummies(s_series_index, drop_first=True, sparse=sparse)
+        tm.assert_frame_equal(result, expected)
+
+    def test_get_dummies_basic_drop_first_NA(self, sparse):
+        # Test NA handling together with drop_first
+        s_NA = ["a", "b", np.nan]
+        res = get_dummies(s_NA, drop_first=True, sparse=sparse)
+        exp = DataFrame({"b": [0, 1, 0]}, dtype=np.uint8)
+        if sparse:
+            exp = exp.apply(SparseArray, fill_value=0)
+
+        tm.assert_frame_equal(res, exp)
+
+        res_na = get_dummies(s_NA, dummy_na=True, drop_first=True, sparse=sparse)
+        exp_na = DataFrame({"b": [0, 1, 0], np.nan: [0, 0, 1]}, dtype=np.uint8).reindex(
+            ["b", np.nan], axis=1
+        )
+        if sparse:
+            exp_na = exp_na.apply(SparseArray, fill_value=0)
+        tm.assert_frame_equal(res_na, exp_na)
+
+        res_just_na = get_dummies(
+            [np.nan], dummy_na=True, drop_first=True, sparse=sparse
+        )
+        exp_just_na = DataFrame(index=np.arange(1))
+        tm.assert_frame_equal(res_just_na, exp_just_na)
+
+    def test_dataframe_dummies_drop_first(self, df, sparse):
+        df = df[["A", "B"]]
+        result = get_dummies(df, drop_first=True, sparse=sparse)
+        expected = DataFrame({"A_b": [0, 1, 0], "B_c": [0, 0, 1]}, dtype=np.uint8)
+        if sparse:
+            expected = expected.apply(SparseArray, fill_value=0)
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype):
+        df["cat"] = Categorical(["x", "y", "y"])
+        result = get_dummies(df, drop_first=True, sparse=sparse)
+        expected = DataFrame(
+            {"C": [1, 2, 3], "A_b": [0, 1, 0], "B_c": [0, 0, 1], "cat_y": [0, 1, 1]}
+        )
+        cols = ["A_b", "B_c", "cat_y"]
+        expected[cols] = expected[cols].astype(np.uint8)
+        expected = expected[["C", "A_b", "B_c", "cat_y"]]
+        if sparse:
+            for col in cols:
+                expected[col] = SparseArray(expected[col])
+        tm.assert_frame_equal(result, expected)
+
+    def test_dataframe_dummies_drop_first_with_na(self, df, sparse):
+        df.loc[3, :] = [np.nan, np.nan, np.nan]
+        result = get_dummies(
+            df, dummy_na=True, drop_first=True, sparse=sparse
+        ).sort_index(axis=1)
+        expected = DataFrame(
+            {
+                "C": [1, 2, 3, np.nan],
+                "A_b": [0, 1, 0, 0],
+                "A_nan": [0, 0, 0, 1],
+                "B_c": [0, 0, 1, 0],
+                "B_nan": [0, 0, 0, 1],
+            }
+        )
+        cols = ["A_b", "A_nan", "B_c", "B_nan"]
+        expected[cols] = expected[cols].astype(np.uint8)
+        expected = expected.sort_index(axis=1)
+        if sparse:
+            for col in cols:
+                expected[col] = SparseArray(expected[col])
+
+        tm.assert_frame_equal(result, expected)
+
+        result = get_dummies(df, dummy_na=False, drop_first=True, sparse=sparse)
+        expected = expected[["C", "A_b", "B_c"]]
+        tm.assert_frame_equal(result, expected)
+
+    def test_get_dummies_int_int(self):
+        data = Series([1, 2, 1])
+        result = get_dummies(data)
+        expected = DataFrame([[1, 0], [0, 1], [1, 0]], columns=[1, 2], dtype=np.uint8)
+        tm.assert_frame_equal(result, expected)
+
+        data = Series(Categorical(["a", "b", "a"]))
+        result = get_dummies(data)
+        expected = DataFrame(
+            [[1, 0], [0, 1], [1, 0]], columns=Categorical(["a", "b"]), dtype=np.uint8
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_get_dummies_int_df(self, dtype):
+        data = DataFrame(
+            {
+                "A": [1, 2, 1],
+                "B": Categorical(["a", "b", "a"]),
+                "C": [1, 2, 1],
+                "D": [1.0, 2.0, 1.0],
+            }
+        )
+        columns = ["C", "D", "A_1", "A_2", "B_a", "B_b"]
+        expected = DataFrame(
+            [[1, 1.0, 1, 0, 1, 0], [2, 2.0, 0, 1, 0, 1], [1, 1.0, 1, 0, 1, 0]],
+            columns=columns,
+        )
+        expected[columns[2:]] = expected[columns[2:]].astype(dtype)
+        result = get_dummies(data, columns=["A", "B"], dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("ordered", [True, False])
+    def test_dataframe_dummies_preserve_categorical_dtype(self, dtype, ordered):
+        # GH13854
+        cat = Categorical(list("xy"), categories=list("xyz"), ordered=ordered)
+        result = get_dummies(cat, dtype=dtype)
+
+        data = np.array([[1, 0, 0], [0, 1, 0]], dtype=self.effective_dtype(dtype))
+        cols = CategoricalIndex(
+            cat.categories, categories=cat.categories, ordered=ordered
+        )
+        expected = DataFrame(data, columns=cols, dtype=self.effective_dtype(dtype))
+
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("sparse", [True, False])
+    def test_get_dummies_dont_sparsify_all_columns(self, sparse):
+        # GH18914
+        df = DataFrame.from_dict({"GDP": [1, 2], "Nation": ["AB", "CD"]})
+        df = get_dummies(df, columns=["Nation"], sparse=sparse)
+        df2 = df.reindex(columns=["GDP"])
+
+        tm.assert_frame_equal(df[["GDP"]], df2)
+
+    def test_get_dummies_duplicate_columns(self, df):
+        # GH20839
+        df.columns = ["A", "A", "A"]
+        result = get_dummies(df).sort_index(axis=1)
+
+        expected = DataFrame(
+            [[1, 1, 0, 1, 0], [2, 0, 1, 1, 0], [3, 1, 0, 0, 1]],
+            columns=["A", "A_a", "A_b", "A_b", "A_c"],
+            dtype=np.uint8,
+        ).sort_index(axis=1)
+
+        expected = expected.astype({"A": np.int64})
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_get_dummies_all_sparse(self):
+        df = DataFrame({"A": [1, 2]})
+        result = get_dummies(df, columns=["A"], sparse=True)
+        dtype = SparseDtype("uint8", 0)
+        expected = DataFrame(
+            {
+                "A_1": SparseArray([1, 0], dtype=dtype),
+                "A_2": SparseArray([0, 1], dtype=dtype),
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("values", ["baz"])
+    def test_get_dummies_with_string_values(self, values):
+        # issue #28383
+        df = DataFrame(
+            {
+                "bar": [1, 2, 3, 4, 5, 6],
+                "foo": ["one", "one", "one", "two", "two", "two"],
+                "baz": ["A", "B", "C", "A", "B", "C"],
+                "zoo": ["x", "y", "z", "q", "w", "t"],
+            }
+        )
+
+        msg = "Input must be a list-like for parameter `columns`"
+
+        with pytest.raises(TypeError, match=msg):
+            get_dummies(df, columns=values)
--- a/dist/client/pandas/tests/reshape/test_melt.py
+++ b/dist/client/pandas/tests/reshape/test_melt.py
--- a/dist/client/pandas/tests/reshape/test_pivot.py
+++ b/dist/client/pandas/tests/reshape/test_pivot.py
--- a/dist/client/pandas/tests/reshape/test_pivot_multilevel.py
+++ b/dist/client/pandas/tests/reshape/test_pivot_multilevel.py
@@ -0,0 +1,252 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    Index,
+    MultiIndex,
+)
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "input_index, input_columns, input_values, "
+    "expected_values, expected_columns, expected_index",
+    [
+        (
+            ["lev4"],
+            "lev3",
+            "values",
+            [
+                [0.0, np.nan],
+                [np.nan, 1.0],
+                [2.0, np.nan],
+                [np.nan, 3.0],
+                [4.0, np.nan],
+                [np.nan, 5.0],
+                [6.0, np.nan],
+                [np.nan, 7.0],
+            ],
+            Index([1, 2], name="lev3"),
+            Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
+        ),
+        (
+            ["lev4"],
+            "lev3",
+            None,
+            [
+                [1.0, np.nan, 1.0, np.nan, 0.0, np.nan],
+                [np.nan, 1.0, np.nan, 1.0, np.nan, 1.0],
+                [1.0, np.nan, 2.0, np.nan, 2.0, np.nan],
+                [np.nan, 1.0, np.nan, 2.0, np.nan, 3.0],
+                [2.0, np.nan, 1.0, np.nan, 4.0, np.nan],
+                [np.nan, 2.0, np.nan, 1.0, np.nan, 5.0],
+                [2.0, np.nan, 2.0, np.nan, 6.0, np.nan],
+                [np.nan, 2.0, np.nan, 2.0, np.nan, 7.0],
+            ],
+            MultiIndex.from_tuples(
+                [
+                    ("lev1", 1),
+                    ("lev1", 2),
+                    ("lev2", 1),
+                    ("lev2", 2),
+                    ("values", 1),
+                    ("values", 2),
+                ],
+                names=[None, "lev3"],
+            ),
+            Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
+        ),
+        (
+            ["lev1", "lev2"],
+            "lev3",
+            "values",
+            [[0, 1], [2, 3], [4, 5], [6, 7]],
+            Index([1, 2], name="lev3"),
+            MultiIndex.from_tuples(
+                [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
+            ),
+        ),
+        (
+            ["lev1", "lev2"],
+            "lev3",
+            None,
+            [[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]],
+            MultiIndex.from_tuples(
+                [("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)],
+                names=[None, "lev3"],
+            ),
+            MultiIndex.from_tuples(
+                [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
+            ),
+        ),
+    ],
+)
+def test_pivot_list_like_index(
+    input_index,
+    input_columns,
+    input_values,
+    expected_values,
+    expected_columns,
+    expected_index,
+):
+    # GH 21425, test when index is given a list
+    df = pd.DataFrame(
+        {
+            "lev1": [1, 1, 1, 1, 2, 2, 2, 2],
+            "lev2": [1, 1, 2, 2, 1, 1, 2, 2],
+            "lev3": [1, 2, 1, 2, 1, 2, 1, 2],
+            "lev4": [1, 2, 3, 4, 5, 6, 7, 8],
+            "values": [0, 1, 2, 3, 4, 5, 6, 7],
+        }
+    )
+
+    result = df.pivot(index=input_index, columns=input_columns, values=input_values)
+    expected = pd.DataFrame(
+        expected_values, columns=expected_columns, index=expected_index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "input_index, input_columns, input_values, "
+    "expected_values, expected_columns, expected_index",
+    [
+        (
+            "lev4",
+            ["lev3"],
+            "values",
+            [
+                [0.0, np.nan],
+                [np.nan, 1.0],
+                [2.0, np.nan],
+                [np.nan, 3.0],
+                [4.0, np.nan],
+                [np.nan, 5.0],
+                [6.0, np.nan],
+                [np.nan, 7.0],
+            ],
+            Index([1, 2], name="lev3"),
+            Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
+        ),
+        (
+            ["lev1", "lev2"],
+            ["lev3"],
+            "values",
+            [[0, 1], [2, 3], [4, 5], [6, 7]],
+            Index([1, 2], name="lev3"),
+            MultiIndex.from_tuples(
+                [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
+            ),
+        ),
+        (
+            ["lev1"],
+            ["lev2", "lev3"],
+            "values",
+            [[0, 1, 2, 3], [4, 5, 6, 7]],
+            MultiIndex.from_tuples(
+                [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev2", "lev3"]
+            ),
+            Index([1, 2], name="lev1"),
+        ),
+        (
+            ["lev1", "lev2"],
+            ["lev3", "lev4"],
+            "values",
+            [
+                [0.0, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+                [np.nan, np.nan, 2.0, 3.0, np.nan, np.nan, np.nan, np.nan],
+                [np.nan, np.nan, np.nan, np.nan, 4.0, 5.0, np.nan, np.nan],
+                [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0],
+            ],
+            MultiIndex.from_tuples(
+                [(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8)],
+                names=["lev3", "lev4"],
+            ),
+            MultiIndex.from_tuples(
+                [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
+            ),
+        ),
+    ],
+)
+def test_pivot_list_like_columns(
+    input_index,
+    input_columns,
+    input_values,
+    expected_values,
+    expected_columns,
+    expected_index,
+):
+    # GH 21425, test when columns is given a list
+    df = pd.DataFrame(
+        {
+            "lev1": [1, 1, 1, 1, 2, 2, 2, 2],
+            "lev2": [1, 1, 2, 2, 1, 1, 2, 2],
+            "lev3": [1, 2, 1, 2, 1, 2, 1, 2],
+            "lev4": [1, 2, 3, 4, 5, 6, 7, 8],
+            "values": [0, 1, 2, 3, 4, 5, 6, 7],
+        }
+    )
+
+    result = df.pivot(index=input_index, columns=input_columns, values=input_values)
+    expected = pd.DataFrame(
+        expected_values, columns=expected_columns, index=expected_index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_pivot_multiindexed_rows_and_cols(using_array_manager):
+    # GH 36360
+
+    df = pd.DataFrame(
+        data=np.arange(12).reshape(4, 3),
+        columns=MultiIndex.from_tuples(
+            [(0, 0), (0, 1), (0, 2)], names=["col_L0", "col_L1"]
+        ),
+        index=MultiIndex.from_tuples(
+            [(0, 0, 0), (0, 0, 1), (1, 1, 1), (1, 0, 0)],
+            names=["idx_L0", "idx_L1", "idx_L2"],
+        ),
+    )
+
+    res = df.pivot_table(
+        index=["idx_L0"],
+        columns=["idx_L1"],
+        values=[(0, 1)],
+        aggfunc=lambda col: col.values.sum(),
+    )
+
+    expected = pd.DataFrame(
+        data=[[5, np.nan], [10, 7.0]],
+        columns=MultiIndex.from_tuples(
+            [(0, 1, 0), (0, 1, 1)], names=["col_L0", "col_L1", "idx_L1"]
+        ),
+        index=Index([0, 1], dtype="int64", name="idx_L0"),
+    )
+    if not using_array_manager:
+        # BlockManager does not preserve the dtypes
+        expected = expected.astype("float64")
+
+    tm.assert_frame_equal(res, expected)
+
+
+def test_pivot_df_multiindex_index_none():
+    # GH 23955
+    df = pd.DataFrame(
+        [
+            ["A", "A1", "label1", 1],
+            ["A", "A2", "label2", 2],
+            ["B", "A1", "label1", 3],
+            ["B", "A2", "label2", 4],
+        ],
+        columns=["index_1", "index_2", "label", "value"],
+    )
+    df = df.set_index(["index_1", "index_2"])
+
+    result = df.pivot(index=None, columns="label", values="value")
+    expected = pd.DataFrame(
+        [[1.0, np.nan], [np.nan, 2.0], [3.0, np.nan], [np.nan, 4.0]],
+        index=df.index,
+        columns=Index(["label1", "label2"], name="label"),
+    )
+    tm.assert_frame_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/test_qcut.py
+++ b/dist/client/pandas/tests/reshape/test_qcut.py
@@ -0,0 +1,302 @@
+import os
+
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    DatetimeIndex,
+    Interval,
+    IntervalIndex,
+    NaT,
+    Series,
+    TimedeltaIndex,
+    Timestamp,
+    cut,
+    date_range,
+    isna,
+    qcut,
+    timedelta_range,
+)
+import pandas._testing as tm
+from pandas.api.types import CategoricalDtype as CDT
+
+from pandas.tseries.offsets import (
+    Day,
+    Nano,
+)
+
+
+def test_qcut():
+    arr = np.random.randn(1000)
+
+    # We store the bins as Index that have been
+    # rounded to comparisons are a bit tricky.
+    labels, _ = qcut(arr, 4, retbins=True)
+    ex_bins = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1.0])
+
+    result = labels.categories.left.values
+    assert np.allclose(result, ex_bins[:-1], atol=1e-2)
+
+    result = labels.categories.right.values
+    assert np.allclose(result, ex_bins[1:], atol=1e-2)
+
+    ex_levels = cut(arr, ex_bins, include_lowest=True)
+    tm.assert_categorical_equal(labels, ex_levels)
+
+
+def test_qcut_bounds():
+    arr = np.random.randn(1000)
+
+    factor = qcut(arr, 10, labels=False)
+    assert len(np.unique(factor)) == 10
+
+
+def test_qcut_specify_quantiles():
+    arr = np.random.randn(100)
+    factor = qcut(arr, [0, 0.25, 0.5, 0.75, 1.0])
+
+    expected = qcut(arr, 4)
+    tm.assert_categorical_equal(factor, expected)
+
+
+def test_qcut_all_bins_same():
+    with pytest.raises(ValueError, match="edges.*unique"):
+        qcut([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3)
+
+
+def test_qcut_include_lowest():
+    values = np.arange(10)
+    ii = qcut(values, 4)
+
+    ex_levels = IntervalIndex(
+        [
+            Interval(-0.001, 2.25),
+            Interval(2.25, 4.5),
+            Interval(4.5, 6.75),
+            Interval(6.75, 9),
+        ]
+    )
+    tm.assert_index_equal(ii.categories, ex_levels)
+
+
+def test_qcut_nas():
+    arr = np.random.randn(100)
+    arr[:20] = np.nan
+
+    result = qcut(arr, 4)
+    assert isna(result[:20]).all()
+
+
+def test_qcut_index():
+    result = qcut([0, 2], 2)
+    intervals = [Interval(-0.001, 1), Interval(1, 2)]
+
+    expected = Categorical(intervals, ordered=True)
+    tm.assert_categorical_equal(result, expected)
+
+
+def test_qcut_binning_issues(datapath):
+    # see gh-1978, gh-1979
+    cut_file = datapath(os.path.join("reshape", "data", "cut_data.csv"))
+    arr = np.loadtxt(cut_file)
+    result = qcut(arr, 20)
+
+    starts = []
+    ends = []
+
+    for lev in np.unique(result):
+        s = lev.left
+        e = lev.right
+        assert s != e
+
+        starts.append(float(s))
+        ends.append(float(e))
+
+    for (sp, sn), (ep, en) in zip(
+        zip(starts[:-1], starts[1:]), zip(ends[:-1], ends[1:])
+    ):
+        assert sp < sn
+        assert ep < en
+        assert ep <= sn
+
+
+def test_qcut_return_intervals():
+    ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
+    res = qcut(ser, [0, 0.333, 0.666, 1])
+
+    exp_levels = np.array(
+        [Interval(-0.001, 2.664), Interval(2.664, 5.328), Interval(5.328, 8)]
+    )
+    exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True))
+    tm.assert_series_equal(res, exp)
+
+
+@pytest.mark.parametrize("labels", ["foo", 1, True])
+def test_qcut_incorrect_labels(labels):
+    # GH 13318
+    values = range(5)
+    msg = "Bin labels must either be False, None or passed in as a list-like argument"
+    with pytest.raises(ValueError, match=msg):
+        qcut(values, 4, labels=labels)
+
+
+@pytest.mark.parametrize("labels", [["a", "b", "c"], list(range(3))])
+def test_qcut_wrong_length_labels(labels):
+    # GH 13318
+    values = range(10)
+    msg = "Bin labels must be one fewer than the number of bin edges"
+    with pytest.raises(ValueError, match=msg):
+        qcut(values, 4, labels=labels)
+
+
+@pytest.mark.parametrize(
+    "labels, expected",
+    [
+        (["a", "b", "c"], Categorical(["a", "b", "c"], ordered=True)),
+        (list(range(3)), Categorical([0, 1, 2], ordered=True)),
+    ],
+)
+def test_qcut_list_like_labels(labels, expected):
+    # GH 13318
+    values = range(3)
+    result = qcut(values, 3, labels=labels)
+    tm.assert_categorical_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "kwargs,msg",
+    [
+        ({"duplicates": "drop"}, None),
+        ({}, "Bin edges must be unique"),
+        ({"duplicates": "raise"}, "Bin edges must be unique"),
+        ({"duplicates": "foo"}, "invalid value for 'duplicates' parameter"),
+    ],
+)
+def test_qcut_duplicates_bin(kwargs, msg):
+    # see gh-7751
+    values = [0, 0, 0, 0, 1, 2, 3]
+
+    if msg is not None:
+        with pytest.raises(ValueError, match=msg):
+            qcut(values, 3, **kwargs)
+    else:
+        result = qcut(values, 3, **kwargs)
+        expected = IntervalIndex([Interval(-0.001, 1), Interval(1, 3)])
+        tm.assert_index_equal(result.categories, expected)
+
+
+@pytest.mark.parametrize(
+    "data,start,end", [(9.0, 8.999, 9.0), (0.0, -0.001, 0.0), (-9.0, -9.001, -9.0)]
+)
+@pytest.mark.parametrize("length", [1, 2])
+@pytest.mark.parametrize("labels", [None, False])
+def test_single_quantile(data, start, end, length, labels):
+    # see gh-15431
+    ser = Series([data] * length)
+    result = qcut(ser, 1, labels=labels)
+
+    if labels is None:
+        intervals = IntervalIndex([Interval(start, end)] * length, closed="right")
+        expected = Series(intervals).astype(CDT(ordered=True))
+    else:
+        expected = Series([0] * length, dtype=np.intp)
+
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "ser",
+    [
+        Series(DatetimeIndex(["20180101", NaT, "20180103"])),
+        Series(TimedeltaIndex(["0 days", NaT, "2 days"])),
+    ],
+    ids=lambda x: str(x.dtype),
+)
+def test_qcut_nat(ser):
+    # see gh-19768
+    intervals = IntervalIndex.from_tuples(
+        [(ser[0] - Nano(), ser[2] - Day()), np.nan, (ser[2] - Day(), ser[2])]
+    )
+    expected = Series(Categorical(intervals, ordered=True))
+
+    result = qcut(ser, 2)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("bins", [3, np.linspace(0, 1, 4)])
+def test_datetime_tz_qcut(bins):
+    # see gh-19872
+    tz = "US/Eastern"
+    ser = Series(date_range("20130101", periods=3, tz=tz))
+
+    result = qcut(ser, bins)
+    expected = Series(
+        IntervalIndex(
+            [
+                Interval(
+                    Timestamp("2012-12-31 23:59:59.999999999", tz=tz),
+                    Timestamp("2013-01-01 16:00:00", tz=tz),
+                ),
+                Interval(
+                    Timestamp("2013-01-01 16:00:00", tz=tz),
+                    Timestamp("2013-01-02 08:00:00", tz=tz),
+                ),
+                Interval(
+                    Timestamp("2013-01-02 08:00:00", tz=tz),
+                    Timestamp("2013-01-03 00:00:00", tz=tz),
+                ),
+            ]
+        )
+    ).astype(CDT(ordered=True))
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "arg,expected_bins",
+    [
+        [
+            timedelta_range("1day", periods=3),
+            TimedeltaIndex(["1 days", "2 days", "3 days"]),
+        ],
+        [
+            date_range("20180101", periods=3),
+            DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"]),
+        ],
+    ],
+)
+def test_date_like_qcut_bins(arg, expected_bins):
+    # see gh-19891
+    ser = Series(arg)
+    result, result_bins = qcut(ser, 2, retbins=True)
+    tm.assert_index_equal(result_bins, expected_bins)
+
+
+@pytest.mark.parametrize("bins", [6, 7])
+@pytest.mark.parametrize(
+    "box, compare",
+    [
+        (Series, tm.assert_series_equal),
+        (np.array, tm.assert_categorical_equal),
+        (list, tm.assert_equal),
+    ],
+)
+def test_qcut_bool_coercion_to_int(bins, box, compare):
+    # issue 20303
+    data_expected = box([0, 1, 1, 0, 1] * 10)
+    data_result = box([False, True, True, False, True] * 10)
+    expected = qcut(data_expected, bins, duplicates="drop")
+    result = qcut(data_result, bins, duplicates="drop")
+    compare(result, expected)
+
+
+@pytest.mark.parametrize("q", [2, 5, 10])
+def test_qcut_nullable_integer(q, any_numeric_ea_dtype):
+    arr = pd.array(np.arange(100), dtype=any_numeric_ea_dtype)
+    arr[::2] = pd.NA
+
+    result = qcut(arr, q)
+    expected = qcut(arr.astype(float), q)
+
+    tm.assert_categorical_equal(result, expected)
--- a/dist/client/pandas/tests/reshape/test_union_categoricals.py
+++ b/dist/client/pandas/tests/reshape/test_union_categoricals.py
@@ -0,0 +1,354 @@
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.concat import union_categoricals
+
+import pandas as pd
+from pandas import (
+    Categorical,
+    CategoricalIndex,
+    Series,
+)
+import pandas._testing as tm
+
+
+class TestUnionCategoricals:
+    def test_union_categorical(self):
+        # GH 13361
+        data = [
+            (list("abc"), list("abd"), list("abcabd")),
+            ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]),
+            ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]),
+            (
+                ["b", "b", np.nan, "a"],
+                ["a", np.nan, "c"],
+                ["b", "b", np.nan, "a", "a", np.nan, "c"],
+            ),
+            (
+                pd.date_range("2014-01-01", "2014-01-05"),
+                pd.date_range("2014-01-06", "2014-01-07"),
+                pd.date_range("2014-01-01", "2014-01-07"),
+            ),
+            (
+                pd.date_range("2014-01-01", "2014-01-05", tz="US/Central"),
+                pd.date_range("2014-01-06", "2014-01-07", tz="US/Central"),
+                pd.date_range("2014-01-01", "2014-01-07", tz="US/Central"),
+            ),
+            (
+                pd.period_range("2014-01-01", "2014-01-05"),
+                pd.period_range("2014-01-06", "2014-01-07"),
+                pd.period_range("2014-01-01", "2014-01-07"),
+            ),
+        ]
+
+        for a, b, combined in data:
+            for box in [Categorical, CategoricalIndex, Series]:
+                result = union_categoricals([box(Categorical(a)), box(Categorical(b))])
+                expected = Categorical(combined)
+                tm.assert_categorical_equal(result, expected)
+
+        # new categories ordered by appearance
+        s = Categorical(["x", "y", "z"])
+        s2 = Categorical(["a", "b", "c"])
+        result = union_categoricals([s, s2])
+        expected = Categorical(
+            ["x", "y", "z", "a", "b", "c"], categories=["x", "y", "z", "a", "b", "c"]
+        )
+        tm.assert_categorical_equal(result, expected)
+
+        s = Categorical([0, 1.2, 2], ordered=True)
+        s2 = Categorical([0, 1.2, 2], ordered=True)
+        result = union_categoricals([s, s2])
+        expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True)
+        tm.assert_categorical_equal(result, expected)
+
+        # must exactly match types
+        s = Categorical([0, 1.2, 2])
+        s2 = Categorical([2, 3, 4])
+        msg = "dtype of categories must be the same"
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([s, s2])
+
+        msg = "No Categoricals to union"
+        with pytest.raises(ValueError, match=msg):
+            union_categoricals([])
+
+    def test_union_categoricals_nan(self):
+        # GH 13759
+        res = union_categoricals(
+            [Categorical([1, 2, np.nan]), Categorical([3, 2, np.nan])]
+        )
+        exp = Categorical([1, 2, np.nan, 3, 2, np.nan])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals(
+            [Categorical(["A", "B"]), Categorical(["B", "B", np.nan])]
+        )
+        exp = Categorical(["A", "B", "B", "B", np.nan])
+        tm.assert_categorical_equal(res, exp)
+
+        val1 = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-03-01"), pd.NaT]
+        val2 = [pd.NaT, pd.Timestamp("2011-01-01"), pd.Timestamp("2011-02-01")]
+
+        res = union_categoricals([Categorical(val1), Categorical(val2)])
+        exp = Categorical(
+            val1 + val2,
+            categories=[
+                pd.Timestamp("2011-01-01"),
+                pd.Timestamp("2011-03-01"),
+                pd.Timestamp("2011-02-01"),
+            ],
+        )
+        tm.assert_categorical_equal(res, exp)
+
+        # all NaN
+        res = union_categoricals(
+            [
+                Categorical(np.array([np.nan, np.nan], dtype=object)),
+                Categorical(["X"]),
+            ]
+        )
+        exp = Categorical([np.nan, np.nan, "X"])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals(
+            [Categorical([np.nan, np.nan]), Categorical([np.nan, np.nan])]
+        )
+        exp = Categorical([np.nan, np.nan, np.nan, np.nan])
+        tm.assert_categorical_equal(res, exp)
+
+    def test_union_categoricals_empty(self):
+        # GH 13759
+        res = union_categoricals([Categorical([]), Categorical([])])
+        exp = Categorical([])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals([Categorical([]), Categorical(["1"])])
+        exp = Categorical(["1"])
+        tm.assert_categorical_equal(res, exp)
+
+    def test_union_categorical_same_category(self):
+        # check fastpath
+        c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])
+        c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4])
+        res = union_categoricals([c1, c2])
+        exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], categories=[1, 2, 3, 4])
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical(["z", "z", "z"], categories=["x", "y", "z"])
+        c2 = Categorical(["x", "x", "x"], categories=["x", "y", "z"])
+        res = union_categoricals([c1, c2])
+        exp = Categorical(["z", "z", "z", "x", "x", "x"], categories=["x", "y", "z"])
+        tm.assert_categorical_equal(res, exp)
+
+    def test_union_categorical_same_categories_different_order(self):
+        # https://github.com/pandas-dev/pandas/issues/19096
+        c1 = Categorical(["a", "b", "c"], categories=["a", "b", "c"])
+        c2 = Categorical(["a", "b", "c"], categories=["b", "a", "c"])
+        result = union_categoricals([c1, c2])
+        expected = Categorical(
+            ["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"]
+        )
+        tm.assert_categorical_equal(result, expected)
+
+    def test_union_categoricals_ordered(self):
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([1, 2, 3], ordered=False)
+
+        msg = "Categorical.ordered must be the same"
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2])
+
+        res = union_categoricals([c1, c1])
+        exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True)
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3, np.nan], ordered=True)
+        c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
+
+        res = union_categoricals([c1, c2])
+        exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True)
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
+
+        msg = "to union ordered Categoricals, all categories must be the same"
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2])
+
+    def test_union_categoricals_ignore_order(self):
+        # GH 15219
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([1, 2, 3], ordered=False)
+
+        res = union_categoricals([c1, c2], ignore_order=True)
+        exp = Categorical([1, 2, 3, 1, 2, 3])
+        tm.assert_categorical_equal(res, exp)
+
+        msg = "Categorical.ordered must be the same"
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2], ignore_order=False)
+
+        res = union_categoricals([c1, c1], ignore_order=True)
+        exp = Categorical([1, 2, 3, 1, 2, 3])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals([c1, c1], ignore_order=False)
+        exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True)
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3, np.nan], ordered=True)
+        c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)
+
+        res = union_categoricals([c1, c2], ignore_order=True)
+        exp = Categorical([1, 2, 3, np.nan, 3, 2])
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)
+
+        res = union_categoricals([c1, c2], ignore_order=True)
+        exp = Categorical([1, 2, 3, 1, 2, 3])
+        tm.assert_categorical_equal(res, exp)
+
+        res = union_categoricals([c2, c1], ignore_order=True, sort_categories=True)
+        exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
+        tm.assert_categorical_equal(res, exp)
+
+        c1 = Categorical([1, 2, 3], ordered=True)
+        c2 = Categorical([4, 5, 6], ordered=True)
+        result = union_categoricals([c1, c2], ignore_order=True)
+        expected = Categorical([1, 2, 3, 4, 5, 6])
+        tm.assert_categorical_equal(result, expected)
+
+        msg = "to union ordered Categoricals, all categories must be the same"
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2], ignore_order=False)
+
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2])
+
+    def test_union_categoricals_sort(self):
+        # GH 13846
+        c1 = Categorical(["x", "y", "z"])
+        c2 = Categorical(["a", "b", "c"])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(
+            ["x", "y", "z", "a", "b", "c"], categories=["a", "b", "c", "x", "y", "z"]
+        )
+        tm.assert_categorical_equal(result, expected)
+
+        # fastpath
+        c1 = Categorical(["a", "b"], categories=["b", "a", "c"])
+        c2 = Categorical(["b", "c"], categories=["b", "a", "c"])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(["a", "b"], categories=["c", "a", "b"])
+        c2 = Categorical(["b", "c"], categories=["c", "a", "b"])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"])
+        tm.assert_categorical_equal(result, expected)
+
+        # fastpath - skip resort
+        c1 = Categorical(["a", "b"], categories=["a", "b", "c"])
+        c2 = Categorical(["b", "c"], categories=["a", "b", "c"])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(["x", np.nan])
+        c2 = Categorical([np.nan, "b"])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical(["x", np.nan, np.nan, "b"], categories=["b", "x"])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical([np.nan])
+        c2 = Categorical([np.nan])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical([np.nan, np.nan])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical([])
+        c2 = Categorical([])
+        result = union_categoricals([c1, c2], sort_categories=True)
+        expected = Categorical([])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True)
+        c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True)
+        msg = "Cannot use sort_categories=True with ordered Categoricals"
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, c2], sort_categories=True)
+
+    def test_union_categoricals_sort_false(self):
+        # GH 13846
+        c1 = Categorical(["x", "y", "z"])
+        c2 = Categorical(["a", "b", "c"])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(
+            ["x", "y", "z", "a", "b", "c"], categories=["x", "y", "z", "a", "b", "c"]
+        )
+        tm.assert_categorical_equal(result, expected)
+
+        # fastpath
+        c1 = Categorical(["a", "b"], categories=["b", "a", "c"])
+        c2 = Categorical(["b", "c"], categories=["b", "a", "c"])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(["a", "b", "b", "c"], categories=["b", "a", "c"])
+        tm.assert_categorical_equal(result, expected)
+
+        # fastpath - skip resort
+        c1 = Categorical(["a", "b"], categories=["a", "b", "c"])
+        c2 = Categorical(["b", "c"], categories=["a", "b", "c"])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(["x", np.nan])
+        c2 = Categorical([np.nan, "b"])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(["x", np.nan, np.nan, "b"], categories=["x", "b"])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical([np.nan])
+        c2 = Categorical([np.nan])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical([np.nan, np.nan])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical([])
+        c2 = Categorical([])
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical([])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True)
+        c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True)
+        result = union_categoricals([c1, c2], sort_categories=False)
+        expected = Categorical(
+            ["b", "a", "a", "c"], categories=["b", "a", "c"], ordered=True
+        )
+        tm.assert_categorical_equal(result, expected)
+
+    def test_union_categorical_unwrap(self):
+        # GH 14173
+        c1 = Categorical(["a", "b"])
+        c2 = Series(["b", "c"], dtype="category")
+        result = union_categoricals([c1, c2])
+        expected = Categorical(["a", "b", "b", "c"])
+        tm.assert_categorical_equal(result, expected)
+
+        c2 = CategoricalIndex(c2)
+        result = union_categoricals([c1, c2])
+        tm.assert_categorical_equal(result, expected)
+
+        c1 = Series(c1)
+        result = union_categoricals([c1, c2])
+        tm.assert_categorical_equal(result, expected)
+
+        msg = "all components to combine must be Categorical"
+        with pytest.raises(TypeError, match=msg):
+            union_categoricals([c1, ["a", "b", "c"]])
--- a/dist/client/pandas/tests/reshape/test_util.py
+++ b/dist/client/pandas/tests/reshape/test_util.py
@@ -0,0 +1,80 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    Index,
+    date_range,
+)
+import pandas._testing as tm
+from pandas.core.reshape.util import cartesian_product
+
+
+class TestCartesianProduct:
+    def test_simple(self):
+        x, y = list("ABC"), [1, 22]
+        result1, result2 = cartesian_product([x, y])
+        expected1 = np.array(["A", "A", "B", "B", "C", "C"])
+        expected2 = np.array([1, 22, 1, 22, 1, 22])
+        tm.assert_numpy_array_equal(result1, expected1)
+        tm.assert_numpy_array_equal(result2, expected2)
+
+    def test_datetimeindex(self):
+        # regression test for GitHub issue #6439
+        # make sure that the ordering on datetimeindex is consistent
+        x = date_range("2000-01-01", periods=2)
+        result1, result2 = (Index(y).day for y in cartesian_product([x, x]))
+        expected1 = Index([1, 1, 2, 2])
+        expected2 = Index([1, 2, 1, 2])
+        tm.assert_index_equal(result1, expected1)
+        tm.assert_index_equal(result2, expected2)
+
+    def test_tzaware_retained(self):
+        x = date_range("2000-01-01", periods=2, tz="US/Pacific")
+        y = np.array([3, 4])
+        result1, result2 = cartesian_product([x, y])
+
+        expected = x.repeat(2)
+        tm.assert_index_equal(result1, expected)
+
+    def test_tzaware_retained_categorical(self):
+        x = date_range("2000-01-01", periods=2, tz="US/Pacific").astype("category")
+        y = np.array([3, 4])
+        result1, result2 = cartesian_product([x, y])
+
+        expected = x.repeat(2)
+        tm.assert_index_equal(result1, expected)
+
+    def test_empty(self):
+        # product of empty factors
+        X = [[], [0, 1], []]
+        Y = [[], [], ["a", "b", "c"]]
+        for x, y in zip(X, Y):
+            expected1 = np.array([], dtype=np.asarray(x).dtype)
+            expected2 = np.array([], dtype=np.asarray(y).dtype)
+            result1, result2 = cartesian_product([x, y])
+            tm.assert_numpy_array_equal(result1, expected1)
+            tm.assert_numpy_array_equal(result2, expected2)
+
+        # empty product (empty input):
+        result = cartesian_product([])
+        expected = []
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "X", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
+    )
+    def test_invalid_input(self, X):
+        msg = "Input must be a list-like of list-likes"
+
+        with pytest.raises(TypeError, match=msg):
+            cartesian_product(X=X)
+
+    def test_exceed_product_space(self):
+        # GH31355: raise useful error when produce space is too large
+        msg = "Product space too large to allocate arrays!"
+
+        with pytest.raises(ValueError, match=msg):
+            dims = [np.arange(0, 22, dtype=np.int16) for i in range(12)] + [
+                (np.arange(15128, dtype=np.int16)),
+            ]
+            cartesian_product(X=dims)