552 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			552 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import re
 | 
						|
 | 
						|
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
from pandas.errors import PerformanceWarning
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
from pandas import (
 | 
						|
    DataFrame,
 | 
						|
    DatetimeIndex,
 | 
						|
    Index,
 | 
						|
    MultiIndex,
 | 
						|
    Series,
 | 
						|
    Timestamp,
 | 
						|
)
 | 
						|
import pandas._testing as tm
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "msg,labels,level",
 | 
						|
    [
 | 
						|
        (r"labels \[4\] not found in level", 4, "a"),
 | 
						|
        (r"labels \[7\] not found in level", 7, "b"),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level):
 | 
						|
    # GH 8594
 | 
						|
    mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"])
 | 
						|
    s = Series([10, 20, 30], index=mi)
 | 
						|
    df = DataFrame([10, 20, 30], index=mi)
 | 
						|
 | 
						|
    with pytest.raises(KeyError, match=msg):
 | 
						|
        s.drop(labels, level=level)
 | 
						|
    with pytest.raises(KeyError, match=msg):
 | 
						|
        df.drop(labels, level=level)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("labels,level", [(4, "a"), (7, "b")])
 | 
						|
def test_drop_errors_ignore(labels, level):
 | 
						|
    # GH 8594
 | 
						|
    mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"])
 | 
						|
    s = Series([10, 20, 30], index=mi)
 | 
						|
    df = DataFrame([10, 20, 30], index=mi)
 | 
						|
 | 
						|
    expected_s = s.drop(labels, level=level, errors="ignore")
 | 
						|
    tm.assert_series_equal(s, expected_s)
 | 
						|
 | 
						|
    expected_df = df.drop(labels, level=level, errors="ignore")
 | 
						|
    tm.assert_frame_equal(df, expected_df)
 | 
						|
 | 
						|
 | 
						|
def test_drop_with_non_unique_datetime_index_and_invalid_keys():
 | 
						|
    # GH 30399
 | 
						|
 | 
						|
    # define dataframe with unique datetime index
 | 
						|
    df = DataFrame(
 | 
						|
        np.random.randn(5, 3),
 | 
						|
        columns=["a", "b", "c"],
 | 
						|
        index=pd.date_range("2012", freq="H", periods=5),
 | 
						|
    )
 | 
						|
    # create dataframe with non-unique datetime index
 | 
						|
    df = df.iloc[[0, 2, 2, 3]].copy()
 | 
						|
 | 
						|
    with pytest.raises(KeyError, match="not found in axis"):
 | 
						|
        df.drop(["a", "b"])  # Dropping with labels not exist in the index
 | 
						|
 | 
						|
 | 
						|
class TestDataFrameDrop:
 | 
						|
    def test_drop_names(self):
 | 
						|
        df = DataFrame(
 | 
						|
            [[1, 2, 3], [3, 4, 5], [5, 6, 7]],
 | 
						|
            index=["a", "b", "c"],
 | 
						|
            columns=["d", "e", "f"],
 | 
						|
        )
 | 
						|
        df.index.name, df.columns.name = "first", "second"
 | 
						|
        df_dropped_b = df.drop("b")
 | 
						|
        df_dropped_e = df.drop("e", axis=1)
 | 
						|
        df_inplace_b, df_inplace_e = df.copy(), df.copy()
 | 
						|
        return_value = df_inplace_b.drop("b", inplace=True)
 | 
						|
        assert return_value is None
 | 
						|
        return_value = df_inplace_e.drop("e", axis=1, inplace=True)
 | 
						|
        assert return_value is None
 | 
						|
        for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e):
 | 
						|
            assert obj.index.name == "first"
 | 
						|
            assert obj.columns.name == "second"
 | 
						|
        assert list(df.columns) == ["d", "e", "f"]
 | 
						|
 | 
						|
        msg = r"\['g'\] not found in axis"
 | 
						|
        with pytest.raises(KeyError, match=msg):
 | 
						|
            df.drop(["g"])
 | 
						|
        with pytest.raises(KeyError, match=msg):
 | 
						|
            df.drop(["g"], axis=1)
 | 
						|
 | 
						|
        # errors = 'ignore'
 | 
						|
        dropped = df.drop(["g"], errors="ignore")
 | 
						|
        expected = Index(["a", "b", "c"], name="first")
 | 
						|
        tm.assert_index_equal(dropped.index, expected)
 | 
						|
 | 
						|
        dropped = df.drop(["b", "g"], errors="ignore")
 | 
						|
        expected = Index(["a", "c"], name="first")
 | 
						|
        tm.assert_index_equal(dropped.index, expected)
 | 
						|
 | 
						|
        dropped = df.drop(["g"], axis=1, errors="ignore")
 | 
						|
        expected = Index(["d", "e", "f"], name="second")
 | 
						|
        tm.assert_index_equal(dropped.columns, expected)
 | 
						|
 | 
						|
        dropped = df.drop(["d", "g"], axis=1, errors="ignore")
 | 
						|
        expected = Index(["e", "f"], name="second")
 | 
						|
        tm.assert_index_equal(dropped.columns, expected)
 | 
						|
 | 
						|
        # GH 16398
 | 
						|
        dropped = df.drop([], errors="ignore")
 | 
						|
        expected = Index(["a", "b", "c"], name="first")
 | 
						|
        tm.assert_index_equal(dropped.index, expected)
 | 
						|
 | 
						|
    def test_drop(self):
 | 
						|
        simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]})
 | 
						|
        tm.assert_frame_equal(simple.drop("A", axis=1), simple[["B"]])
 | 
						|
        tm.assert_frame_equal(simple.drop(["A", "B"], axis="columns"), simple[[]])
 | 
						|
        tm.assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.loc[[2], :])
 | 
						|
        tm.assert_frame_equal(simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :])
 | 
						|
 | 
						|
        with pytest.raises(KeyError, match=r"\[5\] not found in axis"):
 | 
						|
            simple.drop(5)
 | 
						|
        with pytest.raises(KeyError, match=r"\['C'\] not found in axis"):
 | 
						|
            simple.drop("C", axis=1)
 | 
						|
        with pytest.raises(KeyError, match=r"\[5\] not found in axis"):
 | 
						|
            simple.drop([1, 5])
 | 
						|
        with pytest.raises(KeyError, match=r"\['C'\] not found in axis"):
 | 
						|
            simple.drop(["A", "C"], axis=1)
 | 
						|
 | 
						|
        # GH 42881
 | 
						|
        with pytest.raises(KeyError, match=r"\['C', 'D', 'F'\] not found in axis"):
 | 
						|
            simple.drop(["C", "D", "F"], axis=1)
 | 
						|
 | 
						|
        # errors = 'ignore'
 | 
						|
        tm.assert_frame_equal(simple.drop(5, errors="ignore"), simple)
 | 
						|
        tm.assert_frame_equal(
 | 
						|
            simple.drop([0, 5], errors="ignore"), simple.loc[[1, 2, 3], :]
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(simple.drop("C", axis=1, errors="ignore"), simple)
 | 
						|
        tm.assert_frame_equal(
 | 
						|
            simple.drop(["A", "C"], axis=1, errors="ignore"), simple[["B"]]
 | 
						|
        )
 | 
						|
 | 
						|
        # non-unique - wheee!
 | 
						|
        nu_df = DataFrame(
 | 
						|
            list(zip(range(3), range(-3, 1), list("abc"))), columns=["a", "a", "b"]
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(nu_df.drop("a", axis=1), nu_df[["b"]])
 | 
						|
        tm.assert_frame_equal(nu_df.drop("b", axis="columns"), nu_df["a"])
 | 
						|
        tm.assert_frame_equal(nu_df.drop([]), nu_df)  # GH 16398
 | 
						|
 | 
						|
        nu_df = nu_df.set_index(Index(["X", "Y", "X"]))
 | 
						|
        nu_df.columns = list("abc")
 | 
						|
        tm.assert_frame_equal(nu_df.drop("X", axis="rows"), nu_df.loc[["Y"], :])
 | 
						|
        tm.assert_frame_equal(nu_df.drop(["X", "Y"], axis=0), nu_df.loc[[], :])
 | 
						|
 | 
						|
        # inplace cache issue
 | 
						|
        # GH#5628
 | 
						|
        df = DataFrame(np.random.randn(10, 3), columns=list("abc"))
 | 
						|
        expected = df[~(df.b > 0)]
 | 
						|
        return_value = df.drop(labels=df[df.b > 0].index, inplace=True)
 | 
						|
        assert return_value is None
 | 
						|
        tm.assert_frame_equal(df, expected)
 | 
						|
 | 
						|
    def test_drop_multiindex_not_lexsorted(self):
 | 
						|
        # GH#11640
 | 
						|
 | 
						|
        # define the lexsorted version
 | 
						|
        lexsorted_mi = MultiIndex.from_tuples(
 | 
						|
            [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"]
 | 
						|
        )
 | 
						|
        lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
 | 
						|
        assert lexsorted_df.columns._is_lexsorted()
 | 
						|
 | 
						|
        # define the non-lexsorted version
 | 
						|
        not_lexsorted_df = DataFrame(
 | 
						|
            columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
 | 
						|
        )
 | 
						|
        not_lexsorted_df = not_lexsorted_df.pivot_table(
 | 
						|
            index="a", columns=["b", "c"], values="d"
 | 
						|
        )
 | 
						|
        not_lexsorted_df = not_lexsorted_df.reset_index()
 | 
						|
        assert not not_lexsorted_df.columns._is_lexsorted()
 | 
						|
 | 
						|
        # compare the results
 | 
						|
        tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
 | 
						|
 | 
						|
        expected = lexsorted_df.drop("a", axis=1)
 | 
						|
        with tm.assert_produces_warning(PerformanceWarning):
 | 
						|
            result = not_lexsorted_df.drop("a", axis=1)
 | 
						|
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_drop_api_equivalence(self):
 | 
						|
        # equivalence of the labels/axis and index/columns API's (GH#12392)
 | 
						|
        df = DataFrame(
 | 
						|
            [[1, 2, 3], [3, 4, 5], [5, 6, 7]],
 | 
						|
            index=["a", "b", "c"],
 | 
						|
            columns=["d", "e", "f"],
 | 
						|
        )
 | 
						|
 | 
						|
        res1 = df.drop("a")
 | 
						|
        res2 = df.drop(index="a")
 | 
						|
        tm.assert_frame_equal(res1, res2)
 | 
						|
 | 
						|
        res1 = df.drop("d", axis=1)
 | 
						|
        res2 = df.drop(columns="d")
 | 
						|
        tm.assert_frame_equal(res1, res2)
 | 
						|
 | 
						|
        res1 = df.drop(labels="e", axis=1)
 | 
						|
        res2 = df.drop(columns="e")
 | 
						|
        tm.assert_frame_equal(res1, res2)
 | 
						|
 | 
						|
        res1 = df.drop(["a"], axis=0)
 | 
						|
        res2 = df.drop(index=["a"])
 | 
						|
        tm.assert_frame_equal(res1, res2)
 | 
						|
 | 
						|
        res1 = df.drop(["a"], axis=0).drop(["d"], axis=1)
 | 
						|
        res2 = df.drop(index=["a"], columns=["d"])
 | 
						|
        tm.assert_frame_equal(res1, res2)
 | 
						|
 | 
						|
        msg = "Cannot specify both 'labels' and 'index'/'columns'"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            df.drop(labels="a", index="b")
 | 
						|
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            df.drop(labels="a", columns="b")
 | 
						|
 | 
						|
        msg = "Need to specify at least one of 'labels', 'index' or 'columns'"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            df.drop(axis=1)
 | 
						|
 | 
						|
    data = [[1, 2, 3], [1, 2, 3]]
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "actual",
 | 
						|
        [
 | 
						|
            DataFrame(data=data, index=["a", "a"]),
 | 
						|
            DataFrame(data=data, index=["a", "b"]),
 | 
						|
            DataFrame(data=data, index=["a", "b"]).set_index([0, 1]),
 | 
						|
            DataFrame(data=data, index=["a", "a"]).set_index([0, 1]),
 | 
						|
        ],
 | 
						|
    )
 | 
						|
    def test_raise_on_drop_duplicate_index(self, actual):
 | 
						|
 | 
						|
        # GH#19186
 | 
						|
        level = 0 if isinstance(actual.index, MultiIndex) else None
 | 
						|
        msg = re.escape("\"['c'] not found in axis\"")
 | 
						|
        with pytest.raises(KeyError, match=msg):
 | 
						|
            actual.drop("c", level=level, axis=0)
 | 
						|
        with pytest.raises(KeyError, match=msg):
 | 
						|
            actual.T.drop("c", level=level, axis=1)
 | 
						|
        expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore")
 | 
						|
        tm.assert_frame_equal(expected_no_err, actual)
 | 
						|
        expected_no_err = actual.T.drop("c", axis=1, level=level, errors="ignore")
 | 
						|
        tm.assert_frame_equal(expected_no_err.T, actual)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 2]])
 | 
						|
    @pytest.mark.parametrize("drop_labels", [[], [1], [2]])
 | 
						|
    def test_drop_empty_list(self, index, drop_labels):
 | 
						|
        # GH#21494
 | 
						|
        expected_index = [i for i in index if i not in drop_labels]
 | 
						|
        frame = DataFrame(index=index).drop(drop_labels)
 | 
						|
        tm.assert_frame_equal(frame, DataFrame(index=expected_index))
 | 
						|
 | 
						|
    @pytest.mark.parametrize("index", [[1, 2, 3], [1, 2, 2]])
 | 
						|
    @pytest.mark.parametrize("drop_labels", [[1, 4], [4, 5]])
 | 
						|
    def test_drop_non_empty_list(self, index, drop_labels):
 | 
						|
        # GH# 21494
 | 
						|
        with pytest.raises(KeyError, match="not found in axis"):
 | 
						|
            DataFrame(index=index).drop(drop_labels)
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "empty_listlike",
 | 
						|
        [
 | 
						|
            [],
 | 
						|
            {},
 | 
						|
            np.array([]),
 | 
						|
            Series([], dtype="datetime64[ns]"),
 | 
						|
            Index([]),
 | 
						|
            DatetimeIndex([]),
 | 
						|
        ],
 | 
						|
    )
 | 
						|
    def test_drop_empty_listlike_non_unique_datetime_index(self, empty_listlike):
 | 
						|
        # GH#27994
 | 
						|
        data = {"column_a": [5, 10], "column_b": ["one", "two"]}
 | 
						|
        index = [Timestamp("2021-01-01"), Timestamp("2021-01-01")]
 | 
						|
        df = DataFrame(data, index=index)
 | 
						|
 | 
						|
        # Passing empty list-like should return the same DataFrame.
 | 
						|
        expected = df.copy()
 | 
						|
        result = df.drop(empty_listlike)
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_mixed_depth_drop(self):
 | 
						|
        arrays = [
 | 
						|
            ["a", "top", "top", "routine1", "routine1", "routine2"],
 | 
						|
            ["", "OD", "OD", "result1", "result2", "result1"],
 | 
						|
            ["", "wx", "wy", "", "", ""],
 | 
						|
        ]
 | 
						|
 | 
						|
        tuples = sorted(zip(*arrays))
 | 
						|
        index = MultiIndex.from_tuples(tuples)
 | 
						|
        df = DataFrame(np.random.randn(4, 6), columns=index)
 | 
						|
 | 
						|
        result = df.drop("a", axis=1)
 | 
						|
        expected = df.drop([("a", "", "")], axis=1)
 | 
						|
        tm.assert_frame_equal(expected, result)
 | 
						|
 | 
						|
        result = df.drop(["top"], axis=1)
 | 
						|
        expected = df.drop([("top", "OD", "wx")], axis=1)
 | 
						|
        expected = expected.drop([("top", "OD", "wy")], axis=1)
 | 
						|
        tm.assert_frame_equal(expected, result)
 | 
						|
 | 
						|
        result = df.drop(("top", "OD", "wx"), axis=1)
 | 
						|
        expected = df.drop([("top", "OD", "wx")], axis=1)
 | 
						|
        tm.assert_frame_equal(expected, result)
 | 
						|
 | 
						|
        expected = df.drop([("top", "OD", "wy")], axis=1)
 | 
						|
        expected = df.drop("top", axis=1)
 | 
						|
 | 
						|
        result = df.drop("result1", level=1, axis=1)
 | 
						|
        expected = df.drop(
 | 
						|
            [("routine1", "result1", ""), ("routine2", "result1", "")], axis=1
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(expected, result)
 | 
						|
 | 
						|
    def test_drop_multiindex_other_level_nan(self):
 | 
						|
        # GH#12754
 | 
						|
        df = (
 | 
						|
            DataFrame(
 | 
						|
                {
 | 
						|
                    "A": ["one", "one", "two", "two"],
 | 
						|
                    "B": [np.nan, 0.0, 1.0, 2.0],
 | 
						|
                    "C": ["a", "b", "c", "c"],
 | 
						|
                    "D": [1, 2, 3, 4],
 | 
						|
                }
 | 
						|
            )
 | 
						|
            .set_index(["A", "B", "C"])
 | 
						|
            .sort_index()
 | 
						|
        )
 | 
						|
        result = df.drop("c", level="C")
 | 
						|
        expected = DataFrame(
 | 
						|
            [2, 1],
 | 
						|
            columns=["D"],
 | 
						|
            index=MultiIndex.from_tuples(
 | 
						|
                [("one", 0.0, "b"), ("one", np.nan, "a")], names=["A", "B", "C"]
 | 
						|
            ),
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_drop_nonunique(self):
 | 
						|
        df = DataFrame(
 | 
						|
            [
 | 
						|
                ["x-a", "x", "a", 1.5],
 | 
						|
                ["x-a", "x", "a", 1.2],
 | 
						|
                ["z-c", "z", "c", 3.1],
 | 
						|
                ["x-a", "x", "a", 4.1],
 | 
						|
                ["x-b", "x", "b", 5.1],
 | 
						|
                ["x-b", "x", "b", 4.1],
 | 
						|
                ["x-b", "x", "b", 2.2],
 | 
						|
                ["y-a", "y", "a", 1.2],
 | 
						|
                ["z-b", "z", "b", 2.1],
 | 
						|
            ],
 | 
						|
            columns=["var1", "var2", "var3", "var4"],
 | 
						|
        )
 | 
						|
 | 
						|
        grp_size = df.groupby("var1").size()
 | 
						|
        drop_idx = grp_size.loc[grp_size == 1]
 | 
						|
 | 
						|
        idf = df.set_index(["var1", "var2", "var3"])
 | 
						|
 | 
						|
        # it works! GH#2101
 | 
						|
        result = idf.drop(drop_idx.index, level=0).reset_index()
 | 
						|
        expected = df[-df.var1.isin(drop_idx.index)]
 | 
						|
 | 
						|
        result.index = expected.index
 | 
						|
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_drop_level(self, multiindex_dataframe_random_data):
 | 
						|
        frame = multiindex_dataframe_random_data
 | 
						|
 | 
						|
        result = frame.drop(["bar", "qux"], level="first")
 | 
						|
        expected = frame.iloc[[0, 1, 2, 5, 6]]
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        result = frame.drop(["two"], level="second")
 | 
						|
        expected = frame.iloc[[0, 2, 3, 6, 7, 9]]
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        result = frame.T.drop(["bar", "qux"], axis=1, level="first")
 | 
						|
        expected = frame.iloc[[0, 1, 2, 5, 6]].T
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        result = frame.T.drop(["two"], axis=1, level="second")
 | 
						|
        expected = frame.iloc[[0, 2, 3, 6, 7, 9]].T
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_drop_level_nonunique_datetime(self):
 | 
						|
        # GH#12701
 | 
						|
        idx = Index([2, 3, 4, 4, 5], name="id")
 | 
						|
        idxdt = pd.to_datetime(
 | 
						|
            [
 | 
						|
                "201603231400",
 | 
						|
                "201603231500",
 | 
						|
                "201603231600",
 | 
						|
                "201603231600",
 | 
						|
                "201603231700",
 | 
						|
            ]
 | 
						|
        )
 | 
						|
        df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx)
 | 
						|
        df["tstamp"] = idxdt
 | 
						|
        df = df.set_index("tstamp", append=True)
 | 
						|
        ts = Timestamp("201603231600")
 | 
						|
        assert df.index.is_unique is False
 | 
						|
 | 
						|
        result = df.drop(ts, level="tstamp")
 | 
						|
        expected = df.loc[idx != 4]
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("box", [Series, DataFrame])
 | 
						|
    def test_drop_tz_aware_timestamp_across_dst(self, box):
 | 
						|
        # GH#21761
 | 
						|
        start = Timestamp("2017-10-29", tz="Europe/Berlin")
 | 
						|
        end = Timestamp("2017-10-29 04:00:00", tz="Europe/Berlin")
 | 
						|
        index = pd.date_range(start, end, freq="15min")
 | 
						|
        data = box(data=[1] * len(index), index=index)
 | 
						|
        result = data.drop(start)
 | 
						|
        expected_start = Timestamp("2017-10-29 00:15:00", tz="Europe/Berlin")
 | 
						|
        expected_idx = pd.date_range(expected_start, end, freq="15min")
 | 
						|
        expected = box(data=[1] * len(expected_idx), index=expected_idx)
 | 
						|
        tm.assert_equal(result, expected)
 | 
						|
 | 
						|
    def test_drop_preserve_names(self):
 | 
						|
        index = MultiIndex.from_arrays(
 | 
						|
            [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"]
 | 
						|
        )
 | 
						|
 | 
						|
        df = DataFrame(np.random.randn(6, 3), index=index)
 | 
						|
 | 
						|
        result = df.drop([(0, 2)])
 | 
						|
        assert result.index.names == ("one", "two")
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "operation", ["__iadd__", "__isub__", "__imul__", "__ipow__"]
 | 
						|
    )
 | 
						|
    @pytest.mark.parametrize("inplace", [False, True])
 | 
						|
    def test_inplace_drop_and_operation(self, operation, inplace):
 | 
						|
        # GH#30484
 | 
						|
        df = DataFrame({"x": range(5)})
 | 
						|
        expected = df.copy()
 | 
						|
        df["y"] = range(5)
 | 
						|
        y = df["y"]
 | 
						|
 | 
						|
        with tm.assert_produces_warning(None):
 | 
						|
            if inplace:
 | 
						|
                df.drop("y", axis=1, inplace=inplace)
 | 
						|
            else:
 | 
						|
                df = df.drop("y", axis=1, inplace=inplace)
 | 
						|
 | 
						|
            # Perform operation and check result
 | 
						|
            getattr(y, operation)(1)
 | 
						|
            tm.assert_frame_equal(df, expected)
 | 
						|
 | 
						|
    def test_drop_with_non_unique_multiindex(self):
 | 
						|
        # GH#36293
 | 
						|
        mi = MultiIndex.from_arrays([["x", "y", "x"], ["i", "j", "i"]])
 | 
						|
        df = DataFrame([1, 2, 3], index=mi)
 | 
						|
        result = df.drop(index="x")
 | 
						|
        expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]]))
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("indexer", [("a", "a"), [("a", "a")]])
 | 
						|
    def test_drop_tuple_with_non_unique_multiindex(self, indexer):
 | 
						|
        # GH#42771
 | 
						|
        idx = MultiIndex.from_product([["a", "b"], ["a", "a"]])
 | 
						|
        df = DataFrame({"x": range(len(idx))}, index=idx)
 | 
						|
        result = df.drop(index=[("a", "a")])
 | 
						|
        expected = DataFrame(
 | 
						|
            {"x": [2, 3]}, index=MultiIndex.from_tuples([("b", "a"), ("b", "a")])
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_drop_with_duplicate_columns(self):
 | 
						|
        df = DataFrame(
 | 
						|
            [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"]
 | 
						|
        )
 | 
						|
        result = df.drop(["a"], axis=1)
 | 
						|
        expected = DataFrame([[1], [1], [1]], columns=["bar"])
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
        result = df.drop("a", axis=1)
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_drop_with_duplicate_columns2(self):
 | 
						|
        # drop buggy GH#6240
 | 
						|
        df = DataFrame(
 | 
						|
            {
 | 
						|
                "A": np.random.randn(5),
 | 
						|
                "B": np.random.randn(5),
 | 
						|
                "C": np.random.randn(5),
 | 
						|
                "D": ["a", "b", "c", "d", "e"],
 | 
						|
            }
 | 
						|
        )
 | 
						|
 | 
						|
        expected = df.take([0, 1, 1], axis=1)
 | 
						|
        df2 = df.take([2, 0, 1, 2, 1], axis=1)
 | 
						|
        result = df2.drop("C", axis=1)
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_drop_pos_args_deprecation(self):
 | 
						|
        # https://github.com/pandas-dev/pandas/issues/41485
 | 
						|
        df = DataFrame({"a": [1, 2, 3]})
 | 
						|
        msg = (
 | 
						|
            r"In a future version of pandas all arguments of DataFrame\.drop "
 | 
						|
            r"except for the argument 'labels' will be keyword-only"
 | 
						|
        )
 | 
						|
        with tm.assert_produces_warning(FutureWarning, match=msg):
 | 
						|
            result = df.drop("a", 1)
 | 
						|
        expected = DataFrame(index=[0, 1, 2])
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_drop_inplace_no_leftover_column_reference(self):
 | 
						|
        # GH 13934
 | 
						|
        df = DataFrame({"a": [1, 2, 3]})
 | 
						|
        a = df.a
 | 
						|
        df.drop(["a"], axis=1, inplace=True)
 | 
						|
        tm.assert_index_equal(df.columns, Index([], dtype="object"))
 | 
						|
        a -= a.mean()
 | 
						|
        tm.assert_index_equal(df.columns, Index([], dtype="object"))
 | 
						|
 | 
						|
    def test_drop_level_missing_label_multiindex(self):
 | 
						|
        # GH 18561
 | 
						|
        df = DataFrame(index=MultiIndex.from_product([range(3), range(3)]))
 | 
						|
        with pytest.raises(KeyError, match="labels \\[5\\] not found in level"):
 | 
						|
            df.drop(5, level=0)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("idx, level", [(["a", "b"], 0), (["a"], None)])
 | 
						|
    def test_drop_index_ea_dtype(self, any_numeric_ea_dtype, idx, level):
 | 
						|
        # GH#45860
 | 
						|
        df = DataFrame(
 | 
						|
            {"a": [1, 2, 2, pd.NA], "b": 100}, dtype=any_numeric_ea_dtype
 | 
						|
        ).set_index(idx)
 | 
						|
        result = df.drop(Index([2, pd.NA]), level=level)
 | 
						|
        expected = DataFrame(
 | 
						|
            {"a": [1], "b": 100}, dtype=any_numeric_ea_dtype
 | 
						|
        ).set_index(idx)
 | 
						|
        tm.assert_frame_equal(result, expected)
 |