429 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			429 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
from pandas.core.dtypes.dtypes import (
 | 
						|
    DatetimeTZDtype,
 | 
						|
    IntervalDtype,
 | 
						|
    PandasDtype,
 | 
						|
    PeriodDtype,
 | 
						|
)
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
import pandas._testing as tm
 | 
						|
from pandas.tests.extension.base.base import BaseExtensionTests
 | 
						|
 | 
						|
 | 
						|
class BaseSetitemTests(BaseExtensionTests):
 | 
						|
    @pytest.fixture(
 | 
						|
        params=[
 | 
						|
            lambda x: x.index,
 | 
						|
            lambda x: list(x.index),
 | 
						|
            lambda x: slice(None),
 | 
						|
            lambda x: slice(0, len(x)),
 | 
						|
            lambda x: range(len(x)),
 | 
						|
            lambda x: list(range(len(x))),
 | 
						|
            lambda x: np.ones(len(x), dtype=bool),
 | 
						|
        ],
 | 
						|
        ids=[
 | 
						|
            "index",
 | 
						|
            "list[index]",
 | 
						|
            "null_slice",
 | 
						|
            "full_slice",
 | 
						|
            "range",
 | 
						|
            "list(range)",
 | 
						|
            "mask",
 | 
						|
        ],
 | 
						|
    )
 | 
						|
    def full_indexer(self, request):
 | 
						|
        """
 | 
						|
        Fixture for an indexer to pass to obj.loc to get/set the full length of the
 | 
						|
        object.
 | 
						|
 | 
						|
        In some cases, assumes that obj.index is the default RangeIndex.
 | 
						|
        """
 | 
						|
        return request.param
 | 
						|
 | 
						|
    def test_setitem_scalar_series(self, data, box_in_series):
 | 
						|
        if box_in_series:
 | 
						|
            data = pd.Series(data)
 | 
						|
        data[0] = data[1]
 | 
						|
        assert data[0] == data[1]
 | 
						|
 | 
						|
    def test_setitem_sequence(self, data, box_in_series):
 | 
						|
        if box_in_series:
 | 
						|
            data = pd.Series(data)
 | 
						|
        original = data.copy()
 | 
						|
 | 
						|
        data[[0, 1]] = [data[1], data[0]]
 | 
						|
        assert data[0] == original[1]
 | 
						|
        assert data[1] == original[0]
 | 
						|
 | 
						|
    def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
 | 
						|
        ser = pd.Series(data)
 | 
						|
        original = ser.copy()
 | 
						|
        value = [data[0]]
 | 
						|
        if as_array:
 | 
						|
            value = data._from_sequence(value)
 | 
						|
 | 
						|
        xpr = "cannot set using a {} indexer with a different length"
 | 
						|
        with pytest.raises(ValueError, match=xpr.format("list-like")):
 | 
						|
            ser[[0, 1]] = value
 | 
						|
        # Ensure no modifications made before the exception
 | 
						|
        self.assert_series_equal(ser, original)
 | 
						|
 | 
						|
        with pytest.raises(ValueError, match=xpr.format("slice")):
 | 
						|
            ser[slice(3)] = value
 | 
						|
        self.assert_series_equal(ser, original)
 | 
						|
 | 
						|
    def test_setitem_empty_indexer(self, data, box_in_series):
 | 
						|
        if box_in_series:
 | 
						|
            data = pd.Series(data)
 | 
						|
        original = data.copy()
 | 
						|
        data[np.array([], dtype=int)] = []
 | 
						|
        self.assert_equal(data, original)
 | 
						|
 | 
						|
    def test_setitem_sequence_broadcasts(self, data, box_in_series):
 | 
						|
        if box_in_series:
 | 
						|
            data = pd.Series(data)
 | 
						|
        data[[0, 1]] = data[2]
 | 
						|
        assert data[0] == data[2]
 | 
						|
        assert data[1] == data[2]
 | 
						|
 | 
						|
    @pytest.mark.parametrize("setter", ["loc", "iloc"])
 | 
						|
    def test_setitem_scalar(self, data, setter):
 | 
						|
        arr = pd.Series(data)
 | 
						|
        setter = getattr(arr, setter)
 | 
						|
        setter[0] = data[1]
 | 
						|
        assert arr[0] == data[1]
 | 
						|
 | 
						|
    def test_setitem_loc_scalar_mixed(self, data):
 | 
						|
        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
 | 
						|
        df.loc[0, "B"] = data[1]
 | 
						|
        assert df.loc[0, "B"] == data[1]
 | 
						|
 | 
						|
    def test_setitem_loc_scalar_single(self, data):
 | 
						|
        df = pd.DataFrame({"B": data})
 | 
						|
        df.loc[10, "B"] = data[1]
 | 
						|
        assert df.loc[10, "B"] == data[1]
 | 
						|
 | 
						|
    def test_setitem_loc_scalar_multiple_homogoneous(self, data):
 | 
						|
        df = pd.DataFrame({"A": data, "B": data})
 | 
						|
        df.loc[10, "B"] = data[1]
 | 
						|
        assert df.loc[10, "B"] == data[1]
 | 
						|
 | 
						|
    def test_setitem_iloc_scalar_mixed(self, data):
 | 
						|
        df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
 | 
						|
        df.iloc[0, 1] = data[1]
 | 
						|
        assert df.loc[0, "B"] == data[1]
 | 
						|
 | 
						|
    def test_setitem_iloc_scalar_single(self, data):
 | 
						|
        df = pd.DataFrame({"B": data})
 | 
						|
        df.iloc[10, 0] = data[1]
 | 
						|
        assert df.loc[10, "B"] == data[1]
 | 
						|
 | 
						|
    def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
 | 
						|
        df = pd.DataFrame({"A": data, "B": data})
 | 
						|
        df.iloc[10, 1] = data[1]
 | 
						|
        assert df.loc[10, "B"] == data[1]
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "mask",
 | 
						|
        [
 | 
						|
            np.array([True, True, True, False, False]),
 | 
						|
            pd.array([True, True, True, False, False], dtype="boolean"),
 | 
						|
            pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
 | 
						|
        ],
 | 
						|
        ids=["numpy-array", "boolean-array", "boolean-array-na"],
 | 
						|
    )
 | 
						|
    def test_setitem_mask(self, data, mask, box_in_series):
 | 
						|
        arr = data[:5].copy()
 | 
						|
        expected = arr.take([0, 0, 0, 3, 4])
 | 
						|
        if box_in_series:
 | 
						|
            arr = pd.Series(arr)
 | 
						|
            expected = pd.Series(expected)
 | 
						|
        arr[mask] = data[0]
 | 
						|
        self.assert_equal(expected, arr)
 | 
						|
 | 
						|
    def test_setitem_mask_raises(self, data, box_in_series):
 | 
						|
        # wrong length
 | 
						|
        mask = np.array([True, False])
 | 
						|
 | 
						|
        if box_in_series:
 | 
						|
            data = pd.Series(data)
 | 
						|
 | 
						|
        with pytest.raises(IndexError, match="wrong length"):
 | 
						|
            data[mask] = data[0]
 | 
						|
 | 
						|
        mask = pd.array(mask, dtype="boolean")
 | 
						|
        with pytest.raises(IndexError, match="wrong length"):
 | 
						|
            data[mask] = data[0]
 | 
						|
 | 
						|
    def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
 | 
						|
        mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
 | 
						|
        mask[:3] = True
 | 
						|
        mask[3:5] = pd.NA
 | 
						|
 | 
						|
        if box_in_series:
 | 
						|
            data = pd.Series(data)
 | 
						|
 | 
						|
        data[mask] = data[0]
 | 
						|
 | 
						|
        assert (data[:3] == data[0]).all()
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "idx",
 | 
						|
        [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
 | 
						|
        ids=["list", "integer-array", "numpy-array"],
 | 
						|
    )
 | 
						|
    def test_setitem_integer_array(self, data, idx, box_in_series):
 | 
						|
        arr = data[:5].copy()
 | 
						|
        expected = data.take([0, 0, 0, 3, 4])
 | 
						|
 | 
						|
        if box_in_series:
 | 
						|
            arr = pd.Series(arr)
 | 
						|
            expected = pd.Series(expected)
 | 
						|
 | 
						|
        arr[idx] = arr[0]
 | 
						|
        self.assert_equal(arr, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "idx, box_in_series",
 | 
						|
        [
 | 
						|
            ([0, 1, 2, pd.NA], False),
 | 
						|
            pytest.param(
 | 
						|
                [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
 | 
						|
            ),
 | 
						|
            (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
 | 
						|
            (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
 | 
						|
        ],
 | 
						|
        ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
 | 
						|
    )
 | 
						|
    def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
 | 
						|
        arr = data.copy()
 | 
						|
 | 
						|
        # TODO(xfail) this raises KeyError about labels not found (it tries label-based)
 | 
						|
        # for list of labels with Series
 | 
						|
        if box_in_series:
 | 
						|
            arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
 | 
						|
 | 
						|
        msg = "Cannot index with an integer indexer containing NA values"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            arr[idx] = arr[0]
 | 
						|
 | 
						|
    @pytest.mark.parametrize("as_callable", [True, False])
 | 
						|
    @pytest.mark.parametrize("setter", ["loc", None])
 | 
						|
    def test_setitem_mask_aligned(self, data, as_callable, setter):
 | 
						|
        ser = pd.Series(data)
 | 
						|
        mask = np.zeros(len(data), dtype=bool)
 | 
						|
        mask[:2] = True
 | 
						|
 | 
						|
        if as_callable:
 | 
						|
            mask2 = lambda x: mask
 | 
						|
        else:
 | 
						|
            mask2 = mask
 | 
						|
 | 
						|
        if setter:
 | 
						|
            # loc
 | 
						|
            target = getattr(ser, setter)
 | 
						|
        else:
 | 
						|
            # Series.__setitem__
 | 
						|
            target = ser
 | 
						|
 | 
						|
        target[mask2] = data[5:7]
 | 
						|
 | 
						|
        ser[mask2] = data[5:7]
 | 
						|
        assert ser[0] == data[5]
 | 
						|
        assert ser[1] == data[6]
 | 
						|
 | 
						|
    @pytest.mark.parametrize("setter", ["loc", None])
 | 
						|
    def test_setitem_mask_broadcast(self, data, setter):
 | 
						|
        ser = pd.Series(data)
 | 
						|
        mask = np.zeros(len(data), dtype=bool)
 | 
						|
        mask[:2] = True
 | 
						|
 | 
						|
        if setter:  # loc
 | 
						|
            target = getattr(ser, setter)
 | 
						|
        else:  # __setitem__
 | 
						|
            target = ser
 | 
						|
 | 
						|
        target[mask] = data[10]
 | 
						|
        assert ser[0] == data[10]
 | 
						|
        assert ser[1] == data[10]
 | 
						|
 | 
						|
    def test_setitem_expand_columns(self, data):
 | 
						|
        df = pd.DataFrame({"A": data})
 | 
						|
        result = df.copy()
 | 
						|
        result["B"] = 1
 | 
						|
        expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
 | 
						|
        self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        result = df.copy()
 | 
						|
        result.loc[:, "B"] = 1
 | 
						|
        self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        # overwrite with new type
 | 
						|
        result["B"] = data
 | 
						|
        expected = pd.DataFrame({"A": data, "B": data})
 | 
						|
        self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_setitem_expand_with_extension(self, data):
 | 
						|
        df = pd.DataFrame({"A": [1] * len(data)})
 | 
						|
        result = df.copy()
 | 
						|
        result["B"] = data
 | 
						|
        expected = pd.DataFrame({"A": [1] * len(data), "B": data})
 | 
						|
        self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        result = df.copy()
 | 
						|
        result.loc[:, "B"] = data
 | 
						|
        self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_setitem_frame_invalid_length(self, data):
 | 
						|
        df = pd.DataFrame({"A": [1] * len(data)})
 | 
						|
        xpr = (
 | 
						|
            rf"Length of values \({len(data[:5])}\) "
 | 
						|
            rf"does not match length of index \({len(df)}\)"
 | 
						|
        )
 | 
						|
        with pytest.raises(ValueError, match=xpr):
 | 
						|
            df["B"] = data[:5]
 | 
						|
 | 
						|
    def test_setitem_tuple_index(self, data):
 | 
						|
        ser = pd.Series(data[:2], index=[(0, 0), (0, 1)])
 | 
						|
        expected = pd.Series(data.take([1, 1]), index=ser.index)
 | 
						|
        ser[(0, 0)] = data[1]
 | 
						|
        self.assert_series_equal(ser, expected)
 | 
						|
 | 
						|
    def test_setitem_slice(self, data, box_in_series):
 | 
						|
        arr = data[:5].copy()
 | 
						|
        expected = data.take([0, 0, 0, 3, 4])
 | 
						|
        if box_in_series:
 | 
						|
            arr = pd.Series(arr)
 | 
						|
            expected = pd.Series(expected)
 | 
						|
 | 
						|
        arr[:3] = data[0]
 | 
						|
        self.assert_equal(arr, expected)
 | 
						|
 | 
						|
    def test_setitem_loc_iloc_slice(self, data):
 | 
						|
        arr = data[:5].copy()
 | 
						|
        s = pd.Series(arr, index=["a", "b", "c", "d", "e"])
 | 
						|
        expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index)
 | 
						|
 | 
						|
        result = s.copy()
 | 
						|
        result.iloc[:3] = data[0]
 | 
						|
        self.assert_equal(result, expected)
 | 
						|
 | 
						|
        result = s.copy()
 | 
						|
        result.loc[:"c"] = data[0]
 | 
						|
        self.assert_equal(result, expected)
 | 
						|
 | 
						|
    def test_setitem_slice_mismatch_length_raises(self, data):
 | 
						|
        arr = data[:5]
 | 
						|
        with pytest.raises(ValueError):
 | 
						|
            arr[:1] = arr[:2]
 | 
						|
 | 
						|
    def test_setitem_slice_array(self, data):
 | 
						|
        arr = data[:5].copy()
 | 
						|
        arr[:5] = data[-5:]
 | 
						|
        self.assert_extension_array_equal(arr, data[-5:])
 | 
						|
 | 
						|
    def test_setitem_scalar_key_sequence_raise(self, data):
 | 
						|
        arr = data[:5].copy()
 | 
						|
        with pytest.raises(ValueError):
 | 
						|
            arr[0] = arr[[0, 1]]
 | 
						|
 | 
						|
    def test_setitem_preserves_views(self, data):
 | 
						|
        # GH#28150 setitem shouldn't swap the underlying data
 | 
						|
        view1 = data.view()
 | 
						|
        view2 = data[:]
 | 
						|
 | 
						|
        data[0] = data[1]
 | 
						|
        assert view1[0] == data[1]
 | 
						|
        assert view2[0] == data[1]
 | 
						|
 | 
						|
    def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
 | 
						|
        # https://github.com/pandas-dev/pandas/issues/32395
 | 
						|
        df = expected = pd.DataFrame({"data": pd.Series(data)})
 | 
						|
        result = pd.DataFrame(index=df.index)
 | 
						|
 | 
						|
        key = full_indexer(df)
 | 
						|
        result.loc[key, "data"] = df["data"]
 | 
						|
 | 
						|
        self.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_setitem_with_expansion_row(self, data, na_value):
 | 
						|
        df = pd.DataFrame({"data": data[:1]})
 | 
						|
 | 
						|
        df.loc[1, "data"] = data[1]
 | 
						|
        expected = pd.DataFrame({"data": data[:2]})
 | 
						|
        self.assert_frame_equal(df, expected)
 | 
						|
 | 
						|
        # https://github.com/pandas-dev/pandas/issues/47284
 | 
						|
        df.loc[2, "data"] = na_value
 | 
						|
        expected = pd.DataFrame(
 | 
						|
            {"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)}
 | 
						|
        )
 | 
						|
        self.assert_frame_equal(df, expected)
 | 
						|
 | 
						|
    def test_setitem_series(self, data, full_indexer):
 | 
						|
        # https://github.com/pandas-dev/pandas/issues/32395
 | 
						|
        ser = pd.Series(data, name="data")
 | 
						|
        result = pd.Series(index=ser.index, dtype=object, name="data")
 | 
						|
 | 
						|
        # because result has object dtype, the attempt to do setting inplace
 | 
						|
        #  is successful, and object dtype is retained
 | 
						|
        key = full_indexer(ser)
 | 
						|
        result.loc[key] = ser
 | 
						|
 | 
						|
        expected = pd.Series(
 | 
						|
            data.astype(object), index=ser.index, name="data", dtype=object
 | 
						|
        )
 | 
						|
        self.assert_series_equal(result, expected)
 | 
						|
 | 
						|
    def test_setitem_frame_2d_values(self, data, request):
 | 
						|
        # GH#44514
 | 
						|
        df = pd.DataFrame({"A": data})
 | 
						|
 | 
						|
        # Avoiding using_array_manager fixture
 | 
						|
        #  https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
 | 
						|
        using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
 | 
						|
        if using_array_manager:
 | 
						|
            if not isinstance(
 | 
						|
                data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype)
 | 
						|
            ):
 | 
						|
                # These dtypes have non-broken implementations of _can_hold_element
 | 
						|
                mark = pytest.mark.xfail(reason="Goes through split path, loses dtype")
 | 
						|
                request.node.add_marker(mark)
 | 
						|
 | 
						|
        df = pd.DataFrame({"A": data})
 | 
						|
        orig = df.copy()
 | 
						|
 | 
						|
        df.iloc[:] = df
 | 
						|
        self.assert_frame_equal(df, orig)
 | 
						|
 | 
						|
        df.iloc[:-1] = df.iloc[:-1]
 | 
						|
        self.assert_frame_equal(df, orig)
 | 
						|
 | 
						|
        df.iloc[:] = df.values
 | 
						|
        self.assert_frame_equal(df, orig)
 | 
						|
 | 
						|
        df.iloc[:-1] = df.values[:-1]
 | 
						|
        self.assert_frame_equal(df, orig)
 | 
						|
 | 
						|
    def test_delitem_series(self, data):
 | 
						|
        # GH#40763
 | 
						|
        ser = pd.Series(data, name="data")
 | 
						|
 | 
						|
        taker = np.arange(len(ser))
 | 
						|
        taker = np.delete(taker, 1)
 | 
						|
 | 
						|
        expected = ser[taker]
 | 
						|
        del ser[1]
 | 
						|
        self.assert_series_equal(ser, expected)
 | 
						|
 | 
						|
    def test_setitem_invalid(self, data, invalid_scalar):
 | 
						|
        msg = ""  # messages vary by subclass, so we do not test it
 | 
						|
        with pytest.raises((ValueError, TypeError), match=msg):
 | 
						|
            data[0] = invalid_scalar
 | 
						|
 | 
						|
        with pytest.raises((ValueError, TypeError), match=msg):
 | 
						|
            data[:] = invalid_scalar
 |