324 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			324 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
import pandas._testing as tm
 | 
						|
from pandas.arrays import BooleanArray
 | 
						|
from pandas.core.arrays.boolean import coerce_to_array
 | 
						|
 | 
						|
 | 
						|
def test_boolean_array_constructor():
 | 
						|
    values = np.array([True, False, True, False], dtype="bool")
 | 
						|
    mask = np.array([False, False, False, True], dtype="bool")
 | 
						|
 | 
						|
    result = BooleanArray(values, mask)
 | 
						|
    expected = pd.array([True, False, True, None], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
    with pytest.raises(TypeError, match="values should be boolean numpy array"):
 | 
						|
        BooleanArray(values.tolist(), mask)
 | 
						|
 | 
						|
    with pytest.raises(TypeError, match="mask should be boolean numpy array"):
 | 
						|
        BooleanArray(values, mask.tolist())
 | 
						|
 | 
						|
    with pytest.raises(TypeError, match="values should be boolean numpy array"):
 | 
						|
        BooleanArray(values.astype(int), mask)
 | 
						|
 | 
						|
    with pytest.raises(TypeError, match="mask should be boolean numpy array"):
 | 
						|
        BooleanArray(values, None)
 | 
						|
 | 
						|
    with pytest.raises(ValueError, match="values.shape must match mask.shape"):
 | 
						|
        BooleanArray(values.reshape(1, -1), mask)
 | 
						|
 | 
						|
    with pytest.raises(ValueError, match="values.shape must match mask.shape"):
 | 
						|
        BooleanArray(values, mask.reshape(1, -1))
 | 
						|
 | 
						|
 | 
						|
def test_boolean_array_constructor_copy():
 | 
						|
    values = np.array([True, False, True, False], dtype="bool")
 | 
						|
    mask = np.array([False, False, False, True], dtype="bool")
 | 
						|
 | 
						|
    result = BooleanArray(values, mask)
 | 
						|
    assert result._data is values
 | 
						|
    assert result._mask is mask
 | 
						|
 | 
						|
    result = BooleanArray(values, mask, copy=True)
 | 
						|
    assert result._data is not values
 | 
						|
    assert result._mask is not mask
 | 
						|
 | 
						|
 | 
						|
def test_to_boolean_array():
 | 
						|
    expected = BooleanArray(
 | 
						|
        np.array([True, False, True]), np.array([False, False, False])
 | 
						|
    )
 | 
						|
 | 
						|
    result = pd.array([True, False, True], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
    result = pd.array(np.array([True, False, True]), dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
    result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
    # with missing values
 | 
						|
    expected = BooleanArray(
 | 
						|
        np.array([True, False, True]), np.array([False, False, True])
 | 
						|
    )
 | 
						|
 | 
						|
    result = pd.array([True, False, None], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
    result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_to_boolean_array_all_none():
 | 
						|
    expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))
 | 
						|
 | 
						|
    result = pd.array([None, None, None], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
    result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "a, b",
 | 
						|
    [
 | 
						|
        ([True, False, None, np.nan, pd.NA], [True, False, None, None, None]),
 | 
						|
        ([True, np.nan], [True, None]),
 | 
						|
        ([True, pd.NA], [True, None]),
 | 
						|
        ([np.nan, np.nan], [None, None]),
 | 
						|
        (np.array([np.nan, np.nan], dtype=float), [None, None]),
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_to_boolean_array_missing_indicators(a, b):
 | 
						|
    result = pd.array(a, dtype="boolean")
 | 
						|
    expected = pd.array(b, dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "values",
 | 
						|
    [
 | 
						|
        ["foo", "bar"],
 | 
						|
        ["1", "2"],
 | 
						|
        # "foo",
 | 
						|
        [1, 2],
 | 
						|
        [1.0, 2.0],
 | 
						|
        pd.date_range("20130101", periods=2),
 | 
						|
        np.array(["foo"]),
 | 
						|
        np.array([1, 2]),
 | 
						|
        np.array([1.0, 2.0]),
 | 
						|
        [np.nan, {"a": 1}],
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_to_boolean_array_error(values):
 | 
						|
    # error in converting existing arrays to BooleanArray
 | 
						|
    msg = "Need to pass bool-like value"
 | 
						|
    with pytest.raises(TypeError, match=msg):
 | 
						|
        pd.array(values, dtype="boolean")
 | 
						|
 | 
						|
 | 
						|
def test_to_boolean_array_from_integer_array():
 | 
						|
    result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
 | 
						|
    expected = pd.array([True, False, True, False], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
    # with missing values
 | 
						|
    result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
 | 
						|
    expected = pd.array([True, False, True, None], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_to_boolean_array_from_float_array():
 | 
						|
    result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
 | 
						|
    expected = pd.array([True, False, True, False], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
    # with missing values
 | 
						|
    result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
 | 
						|
    expected = pd.array([True, False, True, None], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_to_boolean_array_integer_like():
 | 
						|
    # integers of 0's and 1's
 | 
						|
    result = pd.array([1, 0, 1, 0], dtype="boolean")
 | 
						|
    expected = pd.array([True, False, True, False], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
    # with missing values
 | 
						|
    result = pd.array([1, 0, 1, None], dtype="boolean")
 | 
						|
    expected = pd.array([True, False, True, None], dtype="boolean")
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_coerce_to_array():
 | 
						|
    # TODO this is currently not public API
 | 
						|
    values = np.array([True, False, True, False], dtype="bool")
 | 
						|
    mask = np.array([False, False, False, True], dtype="bool")
 | 
						|
    result = BooleanArray(*coerce_to_array(values, mask=mask))
 | 
						|
    expected = BooleanArray(values, mask)
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
    assert result._data is values
 | 
						|
    assert result._mask is mask
 | 
						|
    result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
 | 
						|
    expected = BooleanArray(values, mask)
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
    assert result._data is not values
 | 
						|
    assert result._mask is not mask
 | 
						|
 | 
						|
    # mixed missing from values and mask
 | 
						|
    values = [True, False, None, False]
 | 
						|
    mask = np.array([False, False, False, True], dtype="bool")
 | 
						|
    result = BooleanArray(*coerce_to_array(values, mask=mask))
 | 
						|
    expected = BooleanArray(
 | 
						|
        np.array([True, False, True, True]), np.array([False, False, True, True])
 | 
						|
    )
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
    result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask))
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
    result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
    # raise errors for wrong dimension
 | 
						|
    values = np.array([True, False, True, False], dtype="bool")
 | 
						|
    mask = np.array([False, False, False, True], dtype="bool")
 | 
						|
 | 
						|
    with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
 | 
						|
        coerce_to_array(values.reshape(1, -1))
 | 
						|
 | 
						|
    with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
 | 
						|
        coerce_to_array(values, mask=mask.reshape(1, -1))
 | 
						|
 | 
						|
 | 
						|
def test_coerce_to_array_from_boolean_array():
 | 
						|
    # passing BooleanArray to coerce_to_array
 | 
						|
    values = np.array([True, False, True, False], dtype="bool")
 | 
						|
    mask = np.array([False, False, False, True], dtype="bool")
 | 
						|
    arr = BooleanArray(values, mask)
 | 
						|
    result = BooleanArray(*coerce_to_array(arr))
 | 
						|
    tm.assert_extension_array_equal(result, arr)
 | 
						|
    # no copy
 | 
						|
    assert result._data is arr._data
 | 
						|
    assert result._mask is arr._mask
 | 
						|
 | 
						|
    result = BooleanArray(*coerce_to_array(arr), copy=True)
 | 
						|
    tm.assert_extension_array_equal(result, arr)
 | 
						|
    assert result._data is not arr._data
 | 
						|
    assert result._mask is not arr._mask
 | 
						|
 | 
						|
    with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
 | 
						|
        coerce_to_array(arr, mask=mask)
 | 
						|
 | 
						|
 | 
						|
def test_coerce_to_numpy_array():
 | 
						|
    # with missing values -> object dtype
 | 
						|
    arr = pd.array([True, False, None], dtype="boolean")
 | 
						|
    result = np.array(arr)
 | 
						|
    expected = np.array([True, False, pd.NA], dtype="object")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    # also with no missing values -> object dtype
 | 
						|
    arr = pd.array([True, False, True], dtype="boolean")
 | 
						|
    result = np.array(arr)
 | 
						|
    expected = np.array([True, False, True], dtype="object")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    # force bool dtype
 | 
						|
    result = np.array(arr, dtype="bool")
 | 
						|
    expected = np.array([True, False, True], dtype="bool")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
    # with missing values will raise error
 | 
						|
    arr = pd.array([True, False, None], dtype="boolean")
 | 
						|
    msg = (
 | 
						|
        "cannot convert to 'bool'-dtype NumPy array with missing values. "
 | 
						|
        "Specify an appropriate 'na_value' for this dtype."
 | 
						|
    )
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        np.array(arr, dtype="bool")
 | 
						|
 | 
						|
 | 
						|
def test_to_boolean_array_from_strings():
 | 
						|
    result = BooleanArray._from_sequence_of_strings(
 | 
						|
        np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object)
 | 
						|
    )
 | 
						|
    expected = BooleanArray(
 | 
						|
        np.array([True, False, True, True, False, False, False]),
 | 
						|
        np.array([False, False, False, False, False, False, True]),
 | 
						|
    )
 | 
						|
 | 
						|
    tm.assert_extension_array_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_to_boolean_array_from_strings_invalid_string():
 | 
						|
    with pytest.raises(ValueError, match="cannot be cast"):
 | 
						|
        BooleanArray._from_sequence_of_strings(["donkey"])
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
 | 
						|
def test_to_numpy(box):
 | 
						|
    con = pd.Series if box else pd.array
 | 
						|
    # default (with or without missing values) -> object dtype
 | 
						|
    arr = con([True, False, True], dtype="boolean")
 | 
						|
    result = arr.to_numpy()
 | 
						|
    expected = np.array([True, False, True], dtype="object")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    arr = con([True, False, None], dtype="boolean")
 | 
						|
    result = arr.to_numpy()
 | 
						|
    expected = np.array([True, False, pd.NA], dtype="object")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    arr = con([True, False, None], dtype="boolean")
 | 
						|
    result = arr.to_numpy(dtype="str")
 | 
						|
    expected = np.array([True, False, pd.NA], dtype="<U5")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    # no missing values -> can convert to bool, otherwise raises
 | 
						|
    arr = con([True, False, True], dtype="boolean")
 | 
						|
    result = arr.to_numpy(dtype="bool")
 | 
						|
    expected = np.array([True, False, True], dtype="bool")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    arr = con([True, False, None], dtype="boolean")
 | 
						|
    with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"):
 | 
						|
        result = arr.to_numpy(dtype="bool")
 | 
						|
 | 
						|
    # specify dtype and na_value
 | 
						|
    arr = con([True, False, None], dtype="boolean")
 | 
						|
    result = arr.to_numpy(dtype=object, na_value=None)
 | 
						|
    expected = np.array([True, False, None], dtype="object")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    result = arr.to_numpy(dtype=bool, na_value=False)
 | 
						|
    expected = np.array([True, False, False], dtype="bool")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    result = arr.to_numpy(dtype="int64", na_value=-99)
 | 
						|
    expected = np.array([1, 0, -99], dtype="int64")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    result = arr.to_numpy(dtype="float64", na_value=np.nan)
 | 
						|
    expected = np.array([1, 0, np.nan], dtype="float64")
 | 
						|
    tm.assert_numpy_array_equal(result, expected)
 | 
						|
 | 
						|
    # converting to int or float without specifying na_value raises
 | 
						|
    with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
 | 
						|
        arr.to_numpy(dtype="int64")
 | 
						|
    with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
 | 
						|
        arr.to_numpy(dtype="float64")
 | 
						|
 | 
						|
 | 
						|
def test_to_numpy_copy():
 | 
						|
    # to_numpy can be zero-copy if no missing values
 | 
						|
    arr = pd.array([True, False, True], dtype="boolean")
 | 
						|
    result = arr.to_numpy(dtype=bool)
 | 
						|
    result[0] = False
 | 
						|
    tm.assert_extension_array_equal(
 | 
						|
        arr, pd.array([False, False, True], dtype="boolean")
 | 
						|
    )
 | 
						|
 | 
						|
    arr = pd.array([True, False, True], dtype="boolean")
 | 
						|
    result = arr.to_numpy(dtype=bool, copy=True)
 | 
						|
    result[0] = False
 | 
						|
    tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))
 |