766 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			766 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
from pandas import (
 | 
						|
    Categorical,
 | 
						|
    DataFrame,
 | 
						|
    Index,
 | 
						|
    Series,
 | 
						|
)
 | 
						|
import pandas._testing as tm
 | 
						|
 | 
						|
dt_data = [
 | 
						|
    pd.Timestamp("2011-01-01"),
 | 
						|
    pd.Timestamp("2011-01-02"),
 | 
						|
    pd.Timestamp("2011-01-03"),
 | 
						|
]
 | 
						|
tz_data = [
 | 
						|
    pd.Timestamp("2011-01-01", tz="US/Eastern"),
 | 
						|
    pd.Timestamp("2011-01-02", tz="US/Eastern"),
 | 
						|
    pd.Timestamp("2011-01-03", tz="US/Eastern"),
 | 
						|
]
 | 
						|
td_data = [
 | 
						|
    pd.Timedelta("1 days"),
 | 
						|
    pd.Timedelta("2 days"),
 | 
						|
    pd.Timedelta("3 days"),
 | 
						|
]
 | 
						|
period_data = [
 | 
						|
    pd.Period("2011-01", freq="M"),
 | 
						|
    pd.Period("2011-02", freq="M"),
 | 
						|
    pd.Period("2011-03", freq="M"),
 | 
						|
]
 | 
						|
data_dict = {
 | 
						|
    "bool": [True, False, True],
 | 
						|
    "int64": [1, 2, 3],
 | 
						|
    "float64": [1.1, np.nan, 3.3],
 | 
						|
    "category": Categorical(["X", "Y", "Z"]),
 | 
						|
    "object": ["a", "b", "c"],
 | 
						|
    "datetime64[ns]": dt_data,
 | 
						|
    "datetime64[ns, US/Eastern]": tz_data,
 | 
						|
    "timedelta64[ns]": td_data,
 | 
						|
    "period[M]": period_data,
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
class TestConcatAppendCommon:
 | 
						|
    """
 | 
						|
    Test common dtype coercion rules between concat and append.
 | 
						|
    """
 | 
						|
 | 
						|
    @pytest.fixture(params=sorted(data_dict.keys()))
 | 
						|
    def item(self, request):
 | 
						|
        key = request.param
 | 
						|
        return key, data_dict[key]
 | 
						|
 | 
						|
    item2 = item
 | 
						|
 | 
						|
    def _check_expected_dtype(self, obj, label):
 | 
						|
        """
 | 
						|
        Check whether obj has expected dtype depending on label
 | 
						|
        considering not-supported dtypes
 | 
						|
        """
 | 
						|
        if isinstance(obj, Index):
 | 
						|
            if label == "bool":
 | 
						|
                assert obj.dtype == "object"
 | 
						|
            else:
 | 
						|
                assert obj.dtype == label
 | 
						|
        elif isinstance(obj, Series):
 | 
						|
            if label.startswith("period"):
 | 
						|
                assert obj.dtype == "Period[M]"
 | 
						|
            else:
 | 
						|
                assert obj.dtype == label
 | 
						|
        else:
 | 
						|
            raise ValueError
 | 
						|
 | 
						|
    def test_dtypes(self, item):
 | 
						|
        # to confirm test case covers intended dtypes
 | 
						|
        typ, vals = item
 | 
						|
        self._check_expected_dtype(Index(vals), typ)
 | 
						|
        self._check_expected_dtype(Series(vals), typ)
 | 
						|
 | 
						|
    def test_concatlike_same_dtypes(self, item):
 | 
						|
        # GH 13660
 | 
						|
        typ1, vals1 = item
 | 
						|
 | 
						|
        vals2 = vals1
 | 
						|
        vals3 = vals1
 | 
						|
 | 
						|
        if typ1 == "category":
 | 
						|
            exp_data = Categorical(list(vals1) + list(vals2))
 | 
						|
            exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3))
 | 
						|
        else:
 | 
						|
            exp_data = vals1 + vals2
 | 
						|
            exp_data3 = vals1 + vals2 + vals3
 | 
						|
 | 
						|
        # ----- Index ----- #
 | 
						|
 | 
						|
        # index.append
 | 
						|
        res = Index(vals1).append(Index(vals2))
 | 
						|
        exp = Index(exp_data)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        # 3 elements
 | 
						|
        res = Index(vals1).append([Index(vals2), Index(vals3)])
 | 
						|
        exp = Index(exp_data3)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        # index.append name mismatch
 | 
						|
        i1 = Index(vals1, name="x")
 | 
						|
        i2 = Index(vals2, name="y")
 | 
						|
        res = i1.append(i2)
 | 
						|
        exp = Index(exp_data)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        # index.append name match
 | 
						|
        i1 = Index(vals1, name="x")
 | 
						|
        i2 = Index(vals2, name="x")
 | 
						|
        res = i1.append(i2)
 | 
						|
        exp = Index(exp_data, name="x")
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        # cannot append non-index
 | 
						|
        with pytest.raises(TypeError, match="all inputs must be Index"):
 | 
						|
            Index(vals1).append(vals2)
 | 
						|
 | 
						|
        with pytest.raises(TypeError, match="all inputs must be Index"):
 | 
						|
            Index(vals1).append([Index(vals2), vals3])
 | 
						|
 | 
						|
        # ----- Series ----- #
 | 
						|
 | 
						|
        # series.append
 | 
						|
        res = Series(vals1)._append(Series(vals2), ignore_index=True)
 | 
						|
        exp = Series(exp_data)
 | 
						|
        tm.assert_series_equal(res, exp, check_index_type=True)
 | 
						|
 | 
						|
        # concat
 | 
						|
        res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
 | 
						|
        tm.assert_series_equal(res, exp, check_index_type=True)
 | 
						|
 | 
						|
        # 3 elements
 | 
						|
        res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
 | 
						|
        exp = Series(exp_data3)
 | 
						|
        tm.assert_series_equal(res, exp)
 | 
						|
 | 
						|
        res = pd.concat(
 | 
						|
            [Series(vals1), Series(vals2), Series(vals3)],
 | 
						|
            ignore_index=True,
 | 
						|
        )
 | 
						|
        tm.assert_series_equal(res, exp)
 | 
						|
 | 
						|
        # name mismatch
 | 
						|
        s1 = Series(vals1, name="x")
 | 
						|
        s2 = Series(vals2, name="y")
 | 
						|
        res = s1._append(s2, ignore_index=True)
 | 
						|
        exp = Series(exp_data)
 | 
						|
        tm.assert_series_equal(res, exp, check_index_type=True)
 | 
						|
 | 
						|
        res = pd.concat([s1, s2], ignore_index=True)
 | 
						|
        tm.assert_series_equal(res, exp, check_index_type=True)
 | 
						|
 | 
						|
        # name match
 | 
						|
        s1 = Series(vals1, name="x")
 | 
						|
        s2 = Series(vals2, name="x")
 | 
						|
        res = s1._append(s2, ignore_index=True)
 | 
						|
        exp = Series(exp_data, name="x")
 | 
						|
        tm.assert_series_equal(res, exp, check_index_type=True)
 | 
						|
 | 
						|
        res = pd.concat([s1, s2], ignore_index=True)
 | 
						|
        tm.assert_series_equal(res, exp, check_index_type=True)
 | 
						|
 | 
						|
        # cannot append non-index
 | 
						|
        msg = (
 | 
						|
            r"cannot concatenate object of type '.+'; "
 | 
						|
            "only Series and DataFrame objs are valid"
 | 
						|
        )
 | 
						|
        with pytest.raises(TypeError, match=msg):
 | 
						|
            Series(vals1)._append(vals2)
 | 
						|
 | 
						|
        with pytest.raises(TypeError, match=msg):
 | 
						|
            Series(vals1)._append([Series(vals2), vals3])
 | 
						|
 | 
						|
        with pytest.raises(TypeError, match=msg):
 | 
						|
            pd.concat([Series(vals1), vals2])
 | 
						|
 | 
						|
        with pytest.raises(TypeError, match=msg):
 | 
						|
            pd.concat([Series(vals1), Series(vals2), vals3])
 | 
						|
 | 
						|
    def test_concatlike_dtypes_coercion(self, item, item2):
 | 
						|
        # GH 13660
 | 
						|
        typ1, vals1 = item
 | 
						|
        typ2, vals2 = item2
 | 
						|
 | 
						|
        vals3 = vals2
 | 
						|
 | 
						|
        # basically infer
 | 
						|
        exp_index_dtype = None
 | 
						|
        exp_series_dtype = None
 | 
						|
 | 
						|
        if typ1 == typ2:
 | 
						|
            # same dtype is tested in test_concatlike_same_dtypes
 | 
						|
            return
 | 
						|
        elif typ1 == "category" or typ2 == "category":
 | 
						|
            # The `vals1 + vals2` below fails bc one of these is a Categorical
 | 
						|
            #  instead of a list; we have separate dedicated tests for categorical
 | 
						|
            return
 | 
						|
 | 
						|
        warn = None
 | 
						|
        # specify expected dtype
 | 
						|
        if typ1 == "bool" and typ2 in ("int64", "float64"):
 | 
						|
            # series coerces to numeric based on numpy rule
 | 
						|
            # index doesn't because bool is object dtype
 | 
						|
            exp_series_dtype = typ2
 | 
						|
            warn = FutureWarning
 | 
						|
        elif typ2 == "bool" and typ1 in ("int64", "float64"):
 | 
						|
            exp_series_dtype = typ1
 | 
						|
            warn = FutureWarning
 | 
						|
        elif (
 | 
						|
            typ1 == "datetime64[ns, US/Eastern]"
 | 
						|
            or typ2 == "datetime64[ns, US/Eastern]"
 | 
						|
            or typ1 == "timedelta64[ns]"
 | 
						|
            or typ2 == "timedelta64[ns]"
 | 
						|
        ):
 | 
						|
            exp_index_dtype = object
 | 
						|
            exp_series_dtype = object
 | 
						|
 | 
						|
        exp_data = vals1 + vals2
 | 
						|
        exp_data3 = vals1 + vals2 + vals3
 | 
						|
 | 
						|
        # ----- Index ----- #
 | 
						|
 | 
						|
        # index.append
 | 
						|
        res = Index(vals1).append(Index(vals2))
 | 
						|
        exp = Index(exp_data, dtype=exp_index_dtype)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        # 3 elements
 | 
						|
        res = Index(vals1).append([Index(vals2), Index(vals3)])
 | 
						|
        exp = Index(exp_data3, dtype=exp_index_dtype)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        # ----- Series ----- #
 | 
						|
 | 
						|
        # series._append
 | 
						|
        with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
 | 
						|
            # GH#39817
 | 
						|
            res = Series(vals1)._append(Series(vals2), ignore_index=True)
 | 
						|
        exp = Series(exp_data, dtype=exp_series_dtype)
 | 
						|
        tm.assert_series_equal(res, exp, check_index_type=True)
 | 
						|
 | 
						|
        # concat
 | 
						|
        with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
 | 
						|
            # GH#39817
 | 
						|
            res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
 | 
						|
        tm.assert_series_equal(res, exp, check_index_type=True)
 | 
						|
 | 
						|
        # 3 elements
 | 
						|
        with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
 | 
						|
            # GH#39817
 | 
						|
            res = Series(vals1)._append(
 | 
						|
                [Series(vals2), Series(vals3)], ignore_index=True
 | 
						|
            )
 | 
						|
        exp = Series(exp_data3, dtype=exp_series_dtype)
 | 
						|
        tm.assert_series_equal(res, exp)
 | 
						|
 | 
						|
        with tm.assert_produces_warning(warn, match="concatenating bool-dtype"):
 | 
						|
            # GH#39817
 | 
						|
            res = pd.concat(
 | 
						|
                [Series(vals1), Series(vals2), Series(vals3)],
 | 
						|
                ignore_index=True,
 | 
						|
            )
 | 
						|
        tm.assert_series_equal(res, exp)
 | 
						|
 | 
						|
    def test_concatlike_common_coerce_to_pandas_object(self):
 | 
						|
        # GH 13626
 | 
						|
        # result must be Timestamp/Timedelta, not datetime.datetime/timedelta
 | 
						|
        dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"])
 | 
						|
        tdi = pd.TimedeltaIndex(["1 days", "2 days"])
 | 
						|
 | 
						|
        exp = Index(
 | 
						|
            [
 | 
						|
                pd.Timestamp("2011-01-01"),
 | 
						|
                pd.Timestamp("2011-01-02"),
 | 
						|
                pd.Timedelta("1 days"),
 | 
						|
                pd.Timedelta("2 days"),
 | 
						|
            ]
 | 
						|
        )
 | 
						|
 | 
						|
        res = dti.append(tdi)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
        assert isinstance(res[0], pd.Timestamp)
 | 
						|
        assert isinstance(res[-1], pd.Timedelta)
 | 
						|
 | 
						|
        dts = Series(dti)
 | 
						|
        tds = Series(tdi)
 | 
						|
        res = dts._append(tds)
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
        assert isinstance(res.iloc[0], pd.Timestamp)
 | 
						|
        assert isinstance(res.iloc[-1], pd.Timedelta)
 | 
						|
 | 
						|
        res = pd.concat([dts, tds])
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
        assert isinstance(res.iloc[0], pd.Timestamp)
 | 
						|
        assert isinstance(res.iloc[-1], pd.Timedelta)
 | 
						|
 | 
						|
    def test_concatlike_datetimetz(self, tz_aware_fixture):
 | 
						|
        tz = tz_aware_fixture
 | 
						|
        # GH 7795
 | 
						|
        dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
 | 
						|
        dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz)
 | 
						|
 | 
						|
        exp = pd.DatetimeIndex(
 | 
						|
            ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz
 | 
						|
        )
 | 
						|
 | 
						|
        res = dti1.append(dti2)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        dts1 = Series(dti1)
 | 
						|
        dts2 = Series(dti2)
 | 
						|
        res = dts1._append(dts2)
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
        res = pd.concat([dts1, dts2])
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
    @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"])
 | 
						|
    def test_concatlike_datetimetz_short(self, tz):
 | 
						|
        # GH#7795
 | 
						|
        ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz)
 | 
						|
        ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz)
 | 
						|
        df1 = DataFrame(0, index=ix1, columns=["A", "B"])
 | 
						|
        df2 = DataFrame(0, index=ix2, columns=["A", "B"])
 | 
						|
 | 
						|
        exp_idx = pd.DatetimeIndex(
 | 
						|
            ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"],
 | 
						|
            tz=tz,
 | 
						|
        )
 | 
						|
        exp = DataFrame(0, index=exp_idx, columns=["A", "B"])
 | 
						|
 | 
						|
        tm.assert_frame_equal(df1._append(df2), exp)
 | 
						|
        tm.assert_frame_equal(pd.concat([df1, df2]), exp)
 | 
						|
 | 
						|
    def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
 | 
						|
        tz = tz_aware_fixture
 | 
						|
        # GH 13660
 | 
						|
 | 
						|
        # different tz coerces to object
 | 
						|
        dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
 | 
						|
        dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"])
 | 
						|
 | 
						|
        exp = Index(
 | 
						|
            [
 | 
						|
                pd.Timestamp("2011-01-01", tz=tz),
 | 
						|
                pd.Timestamp("2011-01-02", tz=tz),
 | 
						|
                pd.Timestamp("2012-01-01"),
 | 
						|
                pd.Timestamp("2012-01-02"),
 | 
						|
            ],
 | 
						|
            dtype=object,
 | 
						|
        )
 | 
						|
 | 
						|
        res = dti1.append(dti2)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        dts1 = Series(dti1)
 | 
						|
        dts2 = Series(dti2)
 | 
						|
        res = dts1._append(dts2)
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
        res = pd.concat([dts1, dts2])
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
        # different tz
 | 
						|
        dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific")
 | 
						|
 | 
						|
        exp = Index(
 | 
						|
            [
 | 
						|
                pd.Timestamp("2011-01-01", tz=tz),
 | 
						|
                pd.Timestamp("2011-01-02", tz=tz),
 | 
						|
                pd.Timestamp("2012-01-01", tz="US/Pacific"),
 | 
						|
                pd.Timestamp("2012-01-02", tz="US/Pacific"),
 | 
						|
            ],
 | 
						|
            dtype=object,
 | 
						|
        )
 | 
						|
 | 
						|
        res = dti1.append(dti3)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        dts1 = Series(dti1)
 | 
						|
        dts3 = Series(dti3)
 | 
						|
        res = dts1._append(dts3)
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
        res = pd.concat([dts1, dts3])
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
    def test_concatlike_common_period(self):
 | 
						|
        # GH 13660
 | 
						|
        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
 | 
						|
        pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M")
 | 
						|
 | 
						|
        exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M")
 | 
						|
 | 
						|
        res = pi1.append(pi2)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        ps1 = Series(pi1)
 | 
						|
        ps2 = Series(pi2)
 | 
						|
        res = ps1._append(ps2)
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
        res = pd.concat([ps1, ps2])
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
    def test_concatlike_common_period_diff_freq_to_object(self):
 | 
						|
        # GH 13221
 | 
						|
        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
 | 
						|
        pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D")
 | 
						|
 | 
						|
        exp = Index(
 | 
						|
            [
 | 
						|
                pd.Period("2011-01", freq="M"),
 | 
						|
                pd.Period("2011-02", freq="M"),
 | 
						|
                pd.Period("2012-01-01", freq="D"),
 | 
						|
                pd.Period("2012-02-01", freq="D"),
 | 
						|
            ],
 | 
						|
            dtype=object,
 | 
						|
        )
 | 
						|
 | 
						|
        res = pi1.append(pi2)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        ps1 = Series(pi1)
 | 
						|
        ps2 = Series(pi2)
 | 
						|
        res = ps1._append(ps2)
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
        res = pd.concat([ps1, ps2])
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
    def test_concatlike_common_period_mixed_dt_to_object(self):
 | 
						|
        # GH 13221
 | 
						|
        # different datetimelike
 | 
						|
        pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
 | 
						|
        tdi = pd.TimedeltaIndex(["1 days", "2 days"])
 | 
						|
        exp = Index(
 | 
						|
            [
 | 
						|
                pd.Period("2011-01", freq="M"),
 | 
						|
                pd.Period("2011-02", freq="M"),
 | 
						|
                pd.Timedelta("1 days"),
 | 
						|
                pd.Timedelta("2 days"),
 | 
						|
            ],
 | 
						|
            dtype=object,
 | 
						|
        )
 | 
						|
 | 
						|
        res = pi1.append(tdi)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        ps1 = Series(pi1)
 | 
						|
        tds = Series(tdi)
 | 
						|
        res = ps1._append(tds)
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
        res = pd.concat([ps1, tds])
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
        # inverse
 | 
						|
        exp = Index(
 | 
						|
            [
 | 
						|
                pd.Timedelta("1 days"),
 | 
						|
                pd.Timedelta("2 days"),
 | 
						|
                pd.Period("2011-01", freq="M"),
 | 
						|
                pd.Period("2011-02", freq="M"),
 | 
						|
            ],
 | 
						|
            dtype=object,
 | 
						|
        )
 | 
						|
 | 
						|
        res = tdi.append(pi1)
 | 
						|
        tm.assert_index_equal(res, exp)
 | 
						|
 | 
						|
        ps1 = Series(pi1)
 | 
						|
        tds = Series(tdi)
 | 
						|
        res = tds._append(ps1)
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
        res = pd.concat([tds, ps1])
 | 
						|
        tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
 | 
						|
 | 
						|
    def test_concat_categorical(self):
 | 
						|
        # GH 13524
 | 
						|
 | 
						|
        # same categories -> category
 | 
						|
        s1 = Series([1, 2, np.nan], dtype="category")
 | 
						|
        s2 = Series([2, 1, 2], dtype="category")
 | 
						|
 | 
						|
        exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category")
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        # partially different categories => not-category
 | 
						|
        s1 = Series([3, 2], dtype="category")
 | 
						|
        s2 = Series([2, 1], dtype="category")
 | 
						|
 | 
						|
        exp = Series([3, 2, 2, 1])
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        # completely different categories (same dtype) => not-category
 | 
						|
        s1 = Series([10, 11, np.nan], dtype="category")
 | 
						|
        s2 = Series([np.nan, 1, 3, 2], dtype="category")
 | 
						|
 | 
						|
        exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object")
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
    def test_union_categorical_same_categories_different_order(self):
 | 
						|
        # https://github.com/pandas-dev/pandas/issues/19096
 | 
						|
        a = Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"]))
 | 
						|
        b = Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"]))
 | 
						|
        result = pd.concat([a, b], ignore_index=True)
 | 
						|
        expected = Series(
 | 
						|
            Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"])
 | 
						|
        )
 | 
						|
        tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
    def test_concat_categorical_coercion(self):
 | 
						|
        # GH 13524
 | 
						|
 | 
						|
        # category + not-category => not-category
 | 
						|
        s1 = Series([1, 2, np.nan], dtype="category")
 | 
						|
        s2 = Series([2, 1, 2])
 | 
						|
 | 
						|
        exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object")
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        # result shouldn't be affected by 1st elem dtype
 | 
						|
        exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object")
 | 
						|
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
 | 
						|
 | 
						|
        # all values are not in category => not-category
 | 
						|
        s1 = Series([3, 2], dtype="category")
 | 
						|
        s2 = Series([2, 1])
 | 
						|
 | 
						|
        exp = Series([3, 2, 2, 1])
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        exp = Series([2, 1, 3, 2])
 | 
						|
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
 | 
						|
 | 
						|
        # completely different categories => not-category
 | 
						|
        s1 = Series([10, 11, np.nan], dtype="category")
 | 
						|
        s2 = Series([1, 3, 2])
 | 
						|
 | 
						|
        exp = Series([10, 11, np.nan, 1, 3, 2], dtype="object")
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        exp = Series([1, 3, 2, 10, 11, np.nan], dtype="object")
 | 
						|
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
 | 
						|
 | 
						|
        # different dtype => not-category
 | 
						|
        s1 = Series([10, 11, np.nan], dtype="category")
 | 
						|
        s2 = Series(["a", "b", "c"])
 | 
						|
 | 
						|
        exp = Series([10, 11, np.nan, "a", "b", "c"])
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        exp = Series(["a", "b", "c", 10, 11, np.nan])
 | 
						|
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
 | 
						|
 | 
						|
        # if normal series only contains NaN-likes => not-category
 | 
						|
        s1 = Series([10, 11], dtype="category")
 | 
						|
        s2 = Series([np.nan, np.nan, np.nan])
 | 
						|
 | 
						|
        exp = Series([10, 11, np.nan, np.nan, np.nan])
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        exp = Series([np.nan, np.nan, np.nan, 10, 11])
 | 
						|
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
 | 
						|
 | 
						|
    def test_concat_categorical_3elem_coercion(self):
 | 
						|
        # GH 13524
 | 
						|
 | 
						|
        # mixed dtypes => not-category
 | 
						|
        s1 = Series([1, 2, np.nan], dtype="category")
 | 
						|
        s2 = Series([2, 1, 2], dtype="category")
 | 
						|
        s3 = Series([1, 2, 1, 2, np.nan])
 | 
						|
 | 
						|
        exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
 | 
						|
 | 
						|
        exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
 | 
						|
        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
 | 
						|
 | 
						|
        # values are all in either category => not-category
 | 
						|
        s1 = Series([4, 5, 6], dtype="category")
 | 
						|
        s2 = Series([1, 2, 3], dtype="category")
 | 
						|
        s3 = Series([1, 3, 4])
 | 
						|
 | 
						|
        exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4])
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
 | 
						|
 | 
						|
        exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3])
 | 
						|
        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
 | 
						|
 | 
						|
        # values are all in either category => not-category
 | 
						|
        s1 = Series([4, 5, 6], dtype="category")
 | 
						|
        s2 = Series([1, 2, 3], dtype="category")
 | 
						|
        s3 = Series([10, 11, 12])
 | 
						|
 | 
						|
        exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12])
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
 | 
						|
 | 
						|
        exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3])
 | 
						|
        tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
 | 
						|
 | 
						|
    def test_concat_categorical_multi_coercion(self):
 | 
						|
        # GH 13524
 | 
						|
 | 
						|
        s1 = Series([1, 3], dtype="category")
 | 
						|
        s2 = Series([3, 4], dtype="category")
 | 
						|
        s3 = Series([2, 3])
 | 
						|
        s4 = Series([2, 2], dtype="category")
 | 
						|
        s5 = Series([1, np.nan])
 | 
						|
        s6 = Series([1, 3, 2], dtype="category")
 | 
						|
 | 
						|
        # mixed dtype, values are all in categories => not-category
 | 
						|
        exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2])
 | 
						|
        res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True)
 | 
						|
        tm.assert_series_equal(res, exp)
 | 
						|
        res = s1._append([s2, s3, s4, s5, s6], ignore_index=True)
 | 
						|
        tm.assert_series_equal(res, exp)
 | 
						|
 | 
						|
        exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3])
 | 
						|
        res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True)
 | 
						|
        tm.assert_series_equal(res, exp)
 | 
						|
        res = s6._append([s5, s4, s3, s2, s1], ignore_index=True)
 | 
						|
        tm.assert_series_equal(res, exp)
 | 
						|
 | 
						|
    def test_concat_categorical_ordered(self):
 | 
						|
        # GH 13524
 | 
						|
 | 
						|
        s1 = Series(Categorical([1, 2, np.nan], ordered=True))
 | 
						|
        s2 = Series(Categorical([2, 1, 2], ordered=True))
 | 
						|
 | 
						|
        exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True))
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True))
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append([s2, s1], ignore_index=True), exp)
 | 
						|
 | 
						|
    def test_concat_categorical_coercion_nan(self):
 | 
						|
        # GH 13524
 | 
						|
 | 
						|
        # some edge cases
 | 
						|
        # category + not-category => not category
 | 
						|
        s1 = Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category")
 | 
						|
        s2 = Series([np.nan, 1])
 | 
						|
 | 
						|
        exp = Series([np.nan, np.nan, np.nan, 1])
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        s1 = Series([1, np.nan], dtype="category")
 | 
						|
        s2 = Series([np.nan, np.nan])
 | 
						|
 | 
						|
        exp = Series([1, np.nan, np.nan, np.nan], dtype="float")
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        # mixed dtype, all nan-likes => not-category
 | 
						|
        s1 = Series([np.nan, np.nan], dtype="category")
 | 
						|
        s2 = Series([np.nan, np.nan])
 | 
						|
 | 
						|
        exp = Series([np.nan, np.nan, np.nan, np.nan])
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
 | 
						|
 | 
						|
        # all category nan-likes => category
 | 
						|
        s1 = Series([np.nan, np.nan], dtype="category")
 | 
						|
        s2 = Series([np.nan, np.nan], dtype="category")
 | 
						|
 | 
						|
        exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category")
 | 
						|
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
    def test_concat_categorical_empty(self):
 | 
						|
        # GH 13524
 | 
						|
 | 
						|
        s1 = Series([], dtype="category")
 | 
						|
        s2 = Series([1, 2], dtype="category")
 | 
						|
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
 | 
						|
 | 
						|
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
 | 
						|
        tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
 | 
						|
 | 
						|
        s1 = Series([], dtype="category")
 | 
						|
        s2 = Series([], dtype="category")
 | 
						|
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
 | 
						|
 | 
						|
        s1 = Series([], dtype="category")
 | 
						|
        s2 = Series([], dtype="object")
 | 
						|
 | 
						|
        # different dtype => not-category
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
 | 
						|
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
 | 
						|
        tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
 | 
						|
 | 
						|
        s1 = Series([], dtype="category")
 | 
						|
        s2 = Series([np.nan, np.nan])
 | 
						|
 | 
						|
        # empty Series is ignored
 | 
						|
        exp = Series([np.nan, np.nan])
 | 
						|
        tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
 | 
						|
 | 
						|
        tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
 | 
						|
        tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
 | 
						|
 | 
						|
    def test_categorical_concat_append(self):
 | 
						|
        cat = Categorical(["a", "b"], categories=["a", "b"])
 | 
						|
        vals = [1, 2]
 | 
						|
        df = DataFrame({"cats": cat, "vals": vals})
 | 
						|
        cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
 | 
						|
        vals2 = [1, 2, 1, 2]
 | 
						|
        exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))
 | 
						|
 | 
						|
        tm.assert_frame_equal(pd.concat([df, df]), exp)
 | 
						|
        tm.assert_frame_equal(df._append(df), exp)
 | 
						|
 | 
						|
        # GH 13524 can concat different categories
 | 
						|
        cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
 | 
						|
        vals3 = [1, 2]
 | 
						|
        df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
 | 
						|
 | 
						|
        res = pd.concat([df, df_different_categories], ignore_index=True)
 | 
						|
        exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
 | 
						|
        tm.assert_frame_equal(res, exp)
 | 
						|
 | 
						|
        res = df._append(df_different_categories, ignore_index=True)
 | 
						|
        tm.assert_frame_equal(res, exp)
 |