针对pulse-transit的工具
This commit is contained in:
		
							
								
								
									
										791
									
								
								dist/client/pandas/tests/tools/test_to_numeric.py
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										791
									
								
								dist/client/pandas/tests/tools/test_to_numeric.py
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,791 @@
 | 
			
		||||
import decimal
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
from numpy import iinfo
 | 
			
		||||
import pytest
 | 
			
		||||
 | 
			
		||||
from pandas.compat import is_platform_arm
 | 
			
		||||
 | 
			
		||||
import pandas as pd
 | 
			
		||||
from pandas import (
 | 
			
		||||
    DataFrame,
 | 
			
		||||
    Index,
 | 
			
		||||
    Series,
 | 
			
		||||
    to_numeric,
 | 
			
		||||
)
 | 
			
		||||
import pandas._testing as tm
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture(params=[None, "ignore", "raise", "coerce"])
 | 
			
		||||
def errors(request):
 | 
			
		||||
    return request.param
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture(params=[True, False])
 | 
			
		||||
def signed(request):
 | 
			
		||||
    return request.param
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture(params=[lambda x: x, str], ids=["identity", "str"])
 | 
			
		||||
def transform(request):
 | 
			
		||||
    return request.param
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture(params=[47393996303418497800, 100000000000000000000])
 | 
			
		||||
def large_val(request):
 | 
			
		||||
    return request.param
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture(params=[True, False])
 | 
			
		||||
def multiple_elts(request):
 | 
			
		||||
    return request.param
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.fixture(
 | 
			
		||||
    params=[
 | 
			
		||||
        (lambda x: Index(x, name="idx"), tm.assert_index_equal),
 | 
			
		||||
        (lambda x: Series(x, name="ser"), tm.assert_series_equal),
 | 
			
		||||
        (lambda x: np.array(Index(x).values), tm.assert_numpy_array_equal),
 | 
			
		||||
    ]
 | 
			
		||||
)
 | 
			
		||||
def transform_assert_equal(request):
 | 
			
		||||
    return request.param
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "input_kwargs,result_kwargs",
 | 
			
		||||
    [
 | 
			
		||||
        ({}, {"dtype": np.int64}),
 | 
			
		||||
        ({"errors": "coerce", "downcast": "integer"}, {"dtype": np.int8}),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_empty(input_kwargs, result_kwargs):
 | 
			
		||||
    # see gh-16302
 | 
			
		||||
    ser = Series([], dtype=object)
 | 
			
		||||
    result = to_numeric(ser, **input_kwargs)
 | 
			
		||||
 | 
			
		||||
    expected = Series([], **result_kwargs)
 | 
			
		||||
    tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("last_val", ["7", 7])
 | 
			
		||||
def test_series(last_val):
 | 
			
		||||
    ser = Series(["1", "-3.14", last_val])
 | 
			
		||||
    result = to_numeric(ser)
 | 
			
		||||
 | 
			
		||||
    expected = Series([1, -3.14, 7])
 | 
			
		||||
    tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data",
 | 
			
		||||
    [
 | 
			
		||||
        [1, 3, 4, 5],
 | 
			
		||||
        [1.0, 3.0, 4.0, 5.0],
 | 
			
		||||
        # Bool is regarded as numeric.
 | 
			
		||||
        [True, False, True, True],
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_series_numeric(data):
 | 
			
		||||
    ser = Series(data, index=list("ABCD"), name="EFG")
 | 
			
		||||
 | 
			
		||||
    result = to_numeric(ser)
 | 
			
		||||
    tm.assert_series_equal(result, ser)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data,msg",
 | 
			
		||||
    [
 | 
			
		||||
        ([1, -3.14, "apple"], 'Unable to parse string "apple" at position 2'),
 | 
			
		||||
        (
 | 
			
		||||
            ["orange", 1, -3.14, "apple"],
 | 
			
		||||
            'Unable to parse string "orange" at position 0',
 | 
			
		||||
        ),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_error(data, msg):
 | 
			
		||||
    ser = Series(data)
 | 
			
		||||
 | 
			
		||||
    with pytest.raises(ValueError, match=msg):
 | 
			
		||||
        to_numeric(ser, errors="raise")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "errors,exp_data", [("ignore", [1, -3.14, "apple"]), ("coerce", [1, -3.14, np.nan])]
 | 
			
		||||
)
 | 
			
		||||
def test_ignore_error(errors, exp_data):
 | 
			
		||||
    ser = Series([1, -3.14, "apple"])
 | 
			
		||||
    result = to_numeric(ser, errors=errors)
 | 
			
		||||
 | 
			
		||||
    expected = Series(exp_data)
 | 
			
		||||
    tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "errors,exp",
 | 
			
		||||
    [
 | 
			
		||||
        ("raise", 'Unable to parse string "apple" at position 2'),
 | 
			
		||||
        ("ignore", [True, False, "apple"]),
 | 
			
		||||
        # Coerces to float.
 | 
			
		||||
        ("coerce", [1.0, 0.0, np.nan]),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_bool_handling(errors, exp):
 | 
			
		||||
    ser = Series([True, False, "apple"])
 | 
			
		||||
 | 
			
		||||
    if isinstance(exp, str):
 | 
			
		||||
        with pytest.raises(ValueError, match=exp):
 | 
			
		||||
            to_numeric(ser, errors=errors)
 | 
			
		||||
    else:
 | 
			
		||||
        result = to_numeric(ser, errors=errors)
 | 
			
		||||
        expected = Series(exp)
 | 
			
		||||
 | 
			
		||||
        tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_list():
 | 
			
		||||
    ser = ["1", "-3.14", "7"]
 | 
			
		||||
    res = to_numeric(ser)
 | 
			
		||||
 | 
			
		||||
    expected = np.array([1, -3.14, 7])
 | 
			
		||||
    tm.assert_numpy_array_equal(res, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data,arr_kwargs",
 | 
			
		||||
    [
 | 
			
		||||
        ([1, 3, 4, 5], {"dtype": np.int64}),
 | 
			
		||||
        ([1.0, 3.0, 4.0, 5.0], {}),
 | 
			
		||||
        # Boolean is regarded as numeric.
 | 
			
		||||
        ([True, False, True, True], {}),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_list_numeric(data, arr_kwargs):
 | 
			
		||||
    result = to_numeric(data)
 | 
			
		||||
    expected = np.array(data, **arr_kwargs)
 | 
			
		||||
    tm.assert_numpy_array_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("kwargs", [{"dtype": "O"}, {}])
 | 
			
		||||
def test_numeric(kwargs):
 | 
			
		||||
    data = [1, -3.14, 7]
 | 
			
		||||
 | 
			
		||||
    ser = Series(data, **kwargs)
 | 
			
		||||
    result = to_numeric(ser)
 | 
			
		||||
 | 
			
		||||
    expected = Series(data)
 | 
			
		||||
    tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "columns",
 | 
			
		||||
    [
 | 
			
		||||
        # One column.
 | 
			
		||||
        "a",
 | 
			
		||||
        # Multiple columns.
 | 
			
		||||
        ["a", "b"],
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_numeric_df_columns(columns):
 | 
			
		||||
    # see gh-14827
 | 
			
		||||
    df = DataFrame(
 | 
			
		||||
        {
 | 
			
		||||
            "a": [1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), "0.1"],
 | 
			
		||||
            "b": [1.0, 2.0, 3.0, 4.0],
 | 
			
		||||
        }
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    expected = DataFrame({"a": [1.2, 3.14, np.inf, 0.1], "b": [1.0, 2.0, 3.0, 4.0]})
 | 
			
		||||
 | 
			
		||||
    df_copy = df.copy()
 | 
			
		||||
    df_copy[columns] = df_copy[columns].apply(to_numeric)
 | 
			
		||||
 | 
			
		||||
    tm.assert_frame_equal(df_copy, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data,exp_data",
 | 
			
		||||
    [
 | 
			
		||||
        (
 | 
			
		||||
            [[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1],
 | 
			
		||||
            [[3.14, 1.0], 1.6, 0.1],
 | 
			
		||||
        ),
 | 
			
		||||
        ([np.array([decimal.Decimal(3.14), 1.0]), 0.1], [[3.14, 1.0], 0.1]),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_numeric_embedded_arr_likes(data, exp_data):
 | 
			
		||||
    # Test to_numeric with embedded lists and arrays
 | 
			
		||||
    df = DataFrame({"a": data})
 | 
			
		||||
    df["a"] = df["a"].apply(to_numeric)
 | 
			
		||||
 | 
			
		||||
    expected = DataFrame({"a": exp_data})
 | 
			
		||||
    tm.assert_frame_equal(df, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_all_nan():
 | 
			
		||||
    ser = Series(["a", "b", "c"])
 | 
			
		||||
    result = to_numeric(ser, errors="coerce")
 | 
			
		||||
 | 
			
		||||
    expected = Series([np.nan, np.nan, np.nan])
 | 
			
		||||
    tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_type_check(errors):
 | 
			
		||||
    # see gh-11776
 | 
			
		||||
    df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]})
 | 
			
		||||
    kwargs = {"errors": errors} if errors is not None else {}
 | 
			
		||||
    with pytest.raises(TypeError, match="1-d array"):
 | 
			
		||||
        to_numeric(df, **kwargs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("val", [1, 1.1, 20001])
 | 
			
		||||
def test_scalar(val, signed, transform):
 | 
			
		||||
    val = -val if signed else val
 | 
			
		||||
    assert to_numeric(transform(val)) == float(val)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_really_large_scalar(large_val, signed, transform, errors):
 | 
			
		||||
    # see gh-24910
 | 
			
		||||
    kwargs = {"errors": errors} if errors is not None else {}
 | 
			
		||||
    val = -large_val if signed else large_val
 | 
			
		||||
 | 
			
		||||
    val = transform(val)
 | 
			
		||||
    val_is_string = isinstance(val, str)
 | 
			
		||||
 | 
			
		||||
    if val_is_string and errors in (None, "raise"):
 | 
			
		||||
        msg = "Integer out of range. at position 0"
 | 
			
		||||
        with pytest.raises(ValueError, match=msg):
 | 
			
		||||
            to_numeric(val, **kwargs)
 | 
			
		||||
    else:
 | 
			
		||||
        expected = float(val) if (errors == "coerce" and val_is_string) else val
 | 
			
		||||
        tm.assert_almost_equal(to_numeric(val, **kwargs), expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_really_large_in_arr(large_val, signed, transform, multiple_elts, errors):
 | 
			
		||||
    # see gh-24910
 | 
			
		||||
    kwargs = {"errors": errors} if errors is not None else {}
 | 
			
		||||
    val = -large_val if signed else large_val
 | 
			
		||||
    val = transform(val)
 | 
			
		||||
 | 
			
		||||
    extra_elt = "string"
 | 
			
		||||
    arr = [val] + multiple_elts * [extra_elt]
 | 
			
		||||
 | 
			
		||||
    val_is_string = isinstance(val, str)
 | 
			
		||||
    coercing = errors == "coerce"
 | 
			
		||||
 | 
			
		||||
    if errors in (None, "raise") and (val_is_string or multiple_elts):
 | 
			
		||||
        if val_is_string:
 | 
			
		||||
            msg = "Integer out of range. at position 0"
 | 
			
		||||
        else:
 | 
			
		||||
            msg = 'Unable to parse string "string" at position 1'
 | 
			
		||||
 | 
			
		||||
        with pytest.raises(ValueError, match=msg):
 | 
			
		||||
            to_numeric(arr, **kwargs)
 | 
			
		||||
    else:
 | 
			
		||||
        result = to_numeric(arr, **kwargs)
 | 
			
		||||
 | 
			
		||||
        exp_val = float(val) if (coercing and val_is_string) else val
 | 
			
		||||
        expected = [exp_val]
 | 
			
		||||
 | 
			
		||||
        if multiple_elts:
 | 
			
		||||
            if coercing:
 | 
			
		||||
                expected.append(np.nan)
 | 
			
		||||
                exp_dtype = float
 | 
			
		||||
            else:
 | 
			
		||||
                expected.append(extra_elt)
 | 
			
		||||
                exp_dtype = object
 | 
			
		||||
        else:
 | 
			
		||||
            exp_dtype = float if isinstance(exp_val, (int, float)) else object
 | 
			
		||||
 | 
			
		||||
        tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors):
 | 
			
		||||
    # see gh-24910
 | 
			
		||||
    #
 | 
			
		||||
    # Even if we discover that we have to hold float, does not mean
 | 
			
		||||
    # we should be lenient on subsequent elements that fail to be integer.
 | 
			
		||||
    kwargs = {"errors": errors} if errors is not None else {}
 | 
			
		||||
    arr = [str(-large_val if signed else large_val)]
 | 
			
		||||
 | 
			
		||||
    if multiple_elts:
 | 
			
		||||
        arr.insert(0, large_val)
 | 
			
		||||
 | 
			
		||||
    if errors in (None, "raise"):
 | 
			
		||||
        index = int(multiple_elts)
 | 
			
		||||
        msg = f"Integer out of range. at position {index}"
 | 
			
		||||
 | 
			
		||||
        with pytest.raises(ValueError, match=msg):
 | 
			
		||||
            to_numeric(arr, **kwargs)
 | 
			
		||||
    else:
 | 
			
		||||
        result = to_numeric(arr, **kwargs)
 | 
			
		||||
 | 
			
		||||
        if errors == "coerce":
 | 
			
		||||
            expected = [float(i) for i in arr]
 | 
			
		||||
            exp_dtype = float
 | 
			
		||||
        else:
 | 
			
		||||
            expected = arr
 | 
			
		||||
            exp_dtype = object
 | 
			
		||||
 | 
			
		||||
        tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "errors,checker",
 | 
			
		||||
    [
 | 
			
		||||
        ("raise", 'Unable to parse string "fail" at position 0'),
 | 
			
		||||
        ("ignore", lambda x: x == "fail"),
 | 
			
		||||
        ("coerce", lambda x: np.isnan(x)),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_scalar_fail(errors, checker):
 | 
			
		||||
    scalar = "fail"
 | 
			
		||||
 | 
			
		||||
    if isinstance(checker, str):
 | 
			
		||||
        with pytest.raises(ValueError, match=checker):
 | 
			
		||||
            to_numeric(scalar, errors=errors)
 | 
			
		||||
    else:
 | 
			
		||||
        assert checker(to_numeric(scalar, errors=errors))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("data", [[1, 2, 3], [1.0, np.nan, 3, np.nan]])
 | 
			
		||||
def test_numeric_dtypes(data, transform_assert_equal):
 | 
			
		||||
    transform, assert_equal = transform_assert_equal
 | 
			
		||||
    data = transform(data)
 | 
			
		||||
 | 
			
		||||
    result = to_numeric(data)
 | 
			
		||||
    assert_equal(result, data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data,exp",
 | 
			
		||||
    [
 | 
			
		||||
        (["1", "2", "3"], np.array([1, 2, 3], dtype="int64")),
 | 
			
		||||
        (["1.5", "2.7", "3.4"], np.array([1.5, 2.7, 3.4])),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_str(data, exp, transform_assert_equal):
 | 
			
		||||
    transform, assert_equal = transform_assert_equal
 | 
			
		||||
    result = to_numeric(transform(data))
 | 
			
		||||
 | 
			
		||||
    expected = transform(exp)
 | 
			
		||||
    assert_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_datetime_like(tz_naive_fixture, transform_assert_equal):
 | 
			
		||||
    transform, assert_equal = transform_assert_equal
 | 
			
		||||
    idx = pd.date_range("20130101", periods=3, tz=tz_naive_fixture)
 | 
			
		||||
 | 
			
		||||
    result = to_numeric(transform(idx))
 | 
			
		||||
    expected = transform(idx.asi8)
 | 
			
		||||
    assert_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_timedelta(transform_assert_equal):
 | 
			
		||||
    transform, assert_equal = transform_assert_equal
 | 
			
		||||
    idx = pd.timedelta_range("1 days", periods=3, freq="D")
 | 
			
		||||
 | 
			
		||||
    result = to_numeric(transform(idx))
 | 
			
		||||
    expected = transform(idx.asi8)
 | 
			
		||||
    assert_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_period(transform_assert_equal):
 | 
			
		||||
    transform, assert_equal = transform_assert_equal
 | 
			
		||||
 | 
			
		||||
    idx = pd.period_range("2011-01", periods=3, freq="M", name="")
 | 
			
		||||
    inp = transform(idx)
 | 
			
		||||
 | 
			
		||||
    if isinstance(inp, Index):
 | 
			
		||||
        result = to_numeric(inp)
 | 
			
		||||
        expected = transform(idx.asi8)
 | 
			
		||||
        assert_equal(result, expected)
 | 
			
		||||
    else:
 | 
			
		||||
        # TODO: PeriodDtype, so support it in to_numeric.
 | 
			
		||||
        pytest.skip("Missing PeriodDtype support in to_numeric")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "errors,expected",
 | 
			
		||||
    [
 | 
			
		||||
        ("raise", "Invalid object type at position 0"),
 | 
			
		||||
        ("ignore", Series([[10.0, 2], 1.0, "apple"])),
 | 
			
		||||
        ("coerce", Series([np.nan, 1.0, np.nan])),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_non_hashable(errors, expected):
 | 
			
		||||
    # see gh-13324
 | 
			
		||||
    ser = Series([[10.0, 2], 1.0, "apple"])
 | 
			
		||||
 | 
			
		||||
    if isinstance(expected, str):
 | 
			
		||||
        with pytest.raises(TypeError, match=expected):
 | 
			
		||||
            to_numeric(ser, errors=errors)
 | 
			
		||||
    else:
 | 
			
		||||
        result = to_numeric(ser, errors=errors)
 | 
			
		||||
        tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_downcast_invalid_cast():
 | 
			
		||||
    # see gh-13352
 | 
			
		||||
    data = ["1", 2, 3]
 | 
			
		||||
    invalid_downcast = "unsigned-integer"
 | 
			
		||||
    msg = "invalid downcasting method provided"
 | 
			
		||||
 | 
			
		||||
    with pytest.raises(ValueError, match=msg):
 | 
			
		||||
        to_numeric(data, downcast=invalid_downcast)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_errors_invalid_value():
 | 
			
		||||
    # see gh-26466
 | 
			
		||||
    data = ["1", 2, 3]
 | 
			
		||||
    invalid_error_value = "invalid"
 | 
			
		||||
    msg = "invalid error value specified"
 | 
			
		||||
 | 
			
		||||
    with pytest.raises(ValueError, match=msg):
 | 
			
		||||
        to_numeric(data, errors=invalid_error_value)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data",
 | 
			
		||||
    [
 | 
			
		||||
        ["1", 2, 3],
 | 
			
		||||
        [1, 2, 3],
 | 
			
		||||
        np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "kwargs,exp_dtype",
 | 
			
		||||
    [
 | 
			
		||||
        # Basic function tests.
 | 
			
		||||
        ({}, np.int64),
 | 
			
		||||
        ({"downcast": None}, np.int64),
 | 
			
		||||
        # Support below np.float32 is rare and far between.
 | 
			
		||||
        ({"downcast": "float"}, np.dtype(np.float32).char),
 | 
			
		||||
        # Basic dtype support.
 | 
			
		||||
        ({"downcast": "unsigned"}, np.dtype(np.typecodes["UnsignedInteger"][0])),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_downcast_basic(data, kwargs, exp_dtype):
 | 
			
		||||
    # see gh-13352
 | 
			
		||||
    result = to_numeric(data, **kwargs)
 | 
			
		||||
    expected = np.array([1, 2, 3], dtype=exp_dtype)
 | 
			
		||||
    tm.assert_numpy_array_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("signed_downcast", ["integer", "signed"])
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data",
 | 
			
		||||
    [
 | 
			
		||||
        ["1", 2, 3],
 | 
			
		||||
        [1, 2, 3],
 | 
			
		||||
        np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_signed_downcast(data, signed_downcast):
 | 
			
		||||
    # see gh-13352
 | 
			
		||||
    smallest_int_dtype = np.dtype(np.typecodes["Integer"][0])
 | 
			
		||||
    expected = np.array([1, 2, 3], dtype=smallest_int_dtype)
 | 
			
		||||
 | 
			
		||||
    res = to_numeric(data, downcast=signed_downcast)
 | 
			
		||||
    tm.assert_numpy_array_equal(res, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_ignore_downcast_invalid_data():
 | 
			
		||||
    # If we can't successfully cast the given
 | 
			
		||||
    # data to a numeric dtype, do not bother
 | 
			
		||||
    # with the downcast parameter.
 | 
			
		||||
    data = ["foo", 2, 3]
 | 
			
		||||
    expected = np.array(data, dtype=object)
 | 
			
		||||
 | 
			
		||||
    res = to_numeric(data, errors="ignore", downcast="unsigned")
 | 
			
		||||
    tm.assert_numpy_array_equal(res, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_ignore_downcast_neg_to_unsigned():
 | 
			
		||||
    # Cannot cast to an unsigned integer
 | 
			
		||||
    # because we have a negative number.
 | 
			
		||||
    data = ["-1", 2, 3]
 | 
			
		||||
    expected = np.array([-1, 2, 3], dtype=np.int64)
 | 
			
		||||
 | 
			
		||||
    res = to_numeric(data, downcast="unsigned")
 | 
			
		||||
    tm.assert_numpy_array_equal(res, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("downcast", ["integer", "signed", "unsigned"])
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data,expected",
 | 
			
		||||
    [
 | 
			
		||||
        (["1.1", 2, 3], np.array([1.1, 2, 3], dtype=np.float64)),
 | 
			
		||||
        (
 | 
			
		||||
            [10000.0, 20000, 3000, 40000.36, 50000, 50000.00],
 | 
			
		||||
            np.array(
 | 
			
		||||
                [10000.0, 20000, 3000, 40000.36, 50000, 50000.00], dtype=np.float64
 | 
			
		||||
            ),
 | 
			
		||||
        ),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_ignore_downcast_cannot_convert_float(data, expected, downcast):
 | 
			
		||||
    # Cannot cast to an integer (signed or unsigned)
 | 
			
		||||
    # because we have a float number.
 | 
			
		||||
    res = to_numeric(data, downcast=downcast)
 | 
			
		||||
    tm.assert_numpy_array_equal(res, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "downcast,expected_dtype",
 | 
			
		||||
    [("integer", np.int16), ("signed", np.int16), ("unsigned", np.uint16)],
 | 
			
		||||
)
 | 
			
		||||
def test_downcast_not8bit(downcast, expected_dtype):
 | 
			
		||||
    # the smallest integer dtype need not be np.(u)int8
 | 
			
		||||
    data = ["256", 257, 258]
 | 
			
		||||
 | 
			
		||||
    expected = np.array([256, 257, 258], dtype=expected_dtype)
 | 
			
		||||
    res = to_numeric(data, downcast=downcast)
 | 
			
		||||
    tm.assert_numpy_array_equal(res, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "dtype,downcast,min_max",
 | 
			
		||||
    [
 | 
			
		||||
        ("int8", "integer", [iinfo(np.int8).min, iinfo(np.int8).max]),
 | 
			
		||||
        ("int16", "integer", [iinfo(np.int16).min, iinfo(np.int16).max]),
 | 
			
		||||
        ("int32", "integer", [iinfo(np.int32).min, iinfo(np.int32).max]),
 | 
			
		||||
        ("int64", "integer", [iinfo(np.int64).min, iinfo(np.int64).max]),
 | 
			
		||||
        ("uint8", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max]),
 | 
			
		||||
        ("uint16", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max]),
 | 
			
		||||
        ("uint32", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max]),
 | 
			
		||||
        ("uint64", "unsigned", [iinfo(np.uint64).min, iinfo(np.uint64).max]),
 | 
			
		||||
        ("int16", "integer", [iinfo(np.int8).min, iinfo(np.int8).max + 1]),
 | 
			
		||||
        ("int32", "integer", [iinfo(np.int16).min, iinfo(np.int16).max + 1]),
 | 
			
		||||
        ("int64", "integer", [iinfo(np.int32).min, iinfo(np.int32).max + 1]),
 | 
			
		||||
        ("int16", "integer", [iinfo(np.int8).min - 1, iinfo(np.int16).max]),
 | 
			
		||||
        ("int32", "integer", [iinfo(np.int16).min - 1, iinfo(np.int32).max]),
 | 
			
		||||
        ("int64", "integer", [iinfo(np.int32).min - 1, iinfo(np.int64).max]),
 | 
			
		||||
        ("uint16", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]),
 | 
			
		||||
        ("uint32", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]),
 | 
			
		||||
        ("uint64", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_downcast_limits(dtype, downcast, min_max):
 | 
			
		||||
    # see gh-14404: test the limits of each downcast.
 | 
			
		||||
    series = to_numeric(Series(min_max), downcast=downcast)
 | 
			
		||||
    assert series.dtype == dtype
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "ser,expected",
 | 
			
		||||
    [
 | 
			
		||||
        (
 | 
			
		||||
            Series([0, 9223372036854775808]),
 | 
			
		||||
            Series([0, 9223372036854775808], dtype=np.uint64),
 | 
			
		||||
        )
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_downcast_uint64(ser, expected):
 | 
			
		||||
    # see gh-14422:
 | 
			
		||||
    # BUG: to_numeric doesn't work uint64 numbers
 | 
			
		||||
 | 
			
		||||
    result = to_numeric(ser, downcast="unsigned")
 | 
			
		||||
 | 
			
		||||
    tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data,exp_data",
 | 
			
		||||
    [
 | 
			
		||||
        (
 | 
			
		||||
            [200, 300, "", "NaN", 30000000000000000000],
 | 
			
		||||
            [200, 300, np.nan, np.nan, 30000000000000000000],
 | 
			
		||||
        ),
 | 
			
		||||
        (
 | 
			
		||||
            ["12345678901234567890", "1234567890", "ITEM"],
 | 
			
		||||
            [12345678901234567890, 1234567890, np.nan],
 | 
			
		||||
        ),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_coerce_uint64_conflict(data, exp_data):
 | 
			
		||||
    # see gh-17007 and gh-17125
 | 
			
		||||
    #
 | 
			
		||||
    # Still returns float despite the uint64-nan conflict,
 | 
			
		||||
    # which would normally force the casting to object.
 | 
			
		||||
    result = to_numeric(Series(data), errors="coerce")
 | 
			
		||||
    expected = Series(exp_data, dtype=float)
 | 
			
		||||
    tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "errors,exp",
 | 
			
		||||
    [
 | 
			
		||||
        ("ignore", Series(["12345678901234567890", "1234567890", "ITEM"])),
 | 
			
		||||
        ("raise", "Unable to parse string"),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_non_coerce_uint64_conflict(errors, exp):
 | 
			
		||||
    # see gh-17007 and gh-17125
 | 
			
		||||
    #
 | 
			
		||||
    # For completeness.
 | 
			
		||||
    ser = Series(["12345678901234567890", "1234567890", "ITEM"])
 | 
			
		||||
 | 
			
		||||
    if isinstance(exp, str):
 | 
			
		||||
        with pytest.raises(ValueError, match=exp):
 | 
			
		||||
            to_numeric(ser, errors=errors)
 | 
			
		||||
    else:
 | 
			
		||||
        result = to_numeric(ser, errors=errors)
 | 
			
		||||
        tm.assert_series_equal(result, ser)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"])
 | 
			
		||||
@pytest.mark.parametrize("dc2", ["integer", "float", "unsigned"])
 | 
			
		||||
def test_downcast_empty(dc1, dc2):
 | 
			
		||||
    # GH32493
 | 
			
		||||
 | 
			
		||||
    tm.assert_numpy_array_equal(
 | 
			
		||||
        to_numeric([], downcast=dc1),
 | 
			
		||||
        to_numeric([], downcast=dc2),
 | 
			
		||||
        check_dtype=False,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_failure_to_convert_uint64_string_to_NaN():
 | 
			
		||||
    # GH 32394
 | 
			
		||||
    result = to_numeric("uint64", errors="coerce")
 | 
			
		||||
    assert np.isnan(result)
 | 
			
		||||
 | 
			
		||||
    ser = Series([32, 64, np.nan])
 | 
			
		||||
    result = to_numeric(Series(["32", "64", "uint64"]), errors="coerce")
 | 
			
		||||
    tm.assert_series_equal(result, ser)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "strrep",
 | 
			
		||||
    [
 | 
			
		||||
        "243.164",
 | 
			
		||||
        "245.968",
 | 
			
		||||
        "249.585",
 | 
			
		||||
        "259.745",
 | 
			
		||||
        "265.742",
 | 
			
		||||
        "272.567",
 | 
			
		||||
        "279.196",
 | 
			
		||||
        "280.366",
 | 
			
		||||
        "275.034",
 | 
			
		||||
        "271.351",
 | 
			
		||||
        "272.889",
 | 
			
		||||
        "270.627",
 | 
			
		||||
        "280.828",
 | 
			
		||||
        "290.383",
 | 
			
		||||
        "308.153",
 | 
			
		||||
        "319.945",
 | 
			
		||||
        "336.0",
 | 
			
		||||
        "344.09",
 | 
			
		||||
        "351.385",
 | 
			
		||||
        "356.178",
 | 
			
		||||
        "359.82",
 | 
			
		||||
        "361.03",
 | 
			
		||||
        "367.701",
 | 
			
		||||
        "380.812",
 | 
			
		||||
        "387.98",
 | 
			
		||||
        "391.749",
 | 
			
		||||
        "391.171",
 | 
			
		||||
        "385.97",
 | 
			
		||||
        "385.345",
 | 
			
		||||
        "386.121",
 | 
			
		||||
        "390.996",
 | 
			
		||||
        "399.734",
 | 
			
		||||
        "413.073",
 | 
			
		||||
        "421.532",
 | 
			
		||||
        "430.221",
 | 
			
		||||
        "437.092",
 | 
			
		||||
        "439.746",
 | 
			
		||||
        "446.01",
 | 
			
		||||
        "451.191",
 | 
			
		||||
        "460.463",
 | 
			
		||||
        "469.779",
 | 
			
		||||
        "472.025",
 | 
			
		||||
        "479.49",
 | 
			
		||||
        "474.864",
 | 
			
		||||
        "467.54",
 | 
			
		||||
        "471.978",
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_precision_float_conversion(strrep):
 | 
			
		||||
    # GH 31364
 | 
			
		||||
    result = to_numeric(strrep)
 | 
			
		||||
 | 
			
		||||
    assert result == float(strrep)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "values, expected",
 | 
			
		||||
    [
 | 
			
		||||
        (["1", "2", None], Series([1, 2, np.nan])),
 | 
			
		||||
        (["1", "2", "3"], Series([1, 2, 3])),
 | 
			
		||||
        (["1", "2", 3], Series([1, 2, 3])),
 | 
			
		||||
        (["1", "2", 3.5], Series([1, 2, 3.5])),
 | 
			
		||||
        (["1", None, 3.5], Series([1, np.nan, 3.5])),
 | 
			
		||||
        (["1", "2", "3.5"], Series([1, 2, 3.5])),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected):
 | 
			
		||||
    # https://github.com/pandas-dev/pandas/issues/37262
 | 
			
		||||
    s = Series(values, dtype=nullable_string_dtype)
 | 
			
		||||
    result = to_numeric(s)
 | 
			
		||||
    tm.assert_series_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    "data, input_dtype, downcast, expected_dtype",
 | 
			
		||||
    (
 | 
			
		||||
        ([1, 1], "Int64", "integer", "Int8"),
 | 
			
		||||
        ([1.0, pd.NA], "Float64", "integer", "Int8"),
 | 
			
		||||
        ([1.0, 1.1], "Float64", "integer", "Float64"),
 | 
			
		||||
        ([1, pd.NA], "Int64", "integer", "Int8"),
 | 
			
		||||
        ([450, 300], "Int64", "integer", "Int16"),
 | 
			
		||||
        ([1, 1], "Float64", "integer", "Int8"),
 | 
			
		||||
        ([np.iinfo(np.int64).max - 1, 1], "Int64", "integer", "Int64"),
 | 
			
		||||
        ([1, 1], "Int64", "signed", "Int8"),
 | 
			
		||||
        ([1.0, 1.0], "Float32", "signed", "Int8"),
 | 
			
		||||
        ([1.0, 1.1], "Float64", "signed", "Float64"),
 | 
			
		||||
        ([1, pd.NA], "Int64", "signed", "Int8"),
 | 
			
		||||
        ([450, -300], "Int64", "signed", "Int16"),
 | 
			
		||||
        pytest.param(
 | 
			
		||||
            [np.iinfo(np.uint64).max - 1, 1],
 | 
			
		||||
            "UInt64",
 | 
			
		||||
            "signed",
 | 
			
		||||
            "UInt64",
 | 
			
		||||
            marks=pytest.mark.xfail(not is_platform_arm(), reason="GH38798"),
 | 
			
		||||
        ),
 | 
			
		||||
        ([1, 1], "Int64", "unsigned", "UInt8"),
 | 
			
		||||
        ([1.0, 1.0], "Float32", "unsigned", "UInt8"),
 | 
			
		||||
        ([1.0, 1.1], "Float64", "unsigned", "Float64"),
 | 
			
		||||
        ([1, pd.NA], "Int64", "unsigned", "UInt8"),
 | 
			
		||||
        ([450, -300], "Int64", "unsigned", "Int64"),
 | 
			
		||||
        ([-1, -1], "Int32", "unsigned", "Int32"),
 | 
			
		||||
        ([1, 1], "Float64", "float", "Float32"),
 | 
			
		||||
        ([1, 1.1], "Float64", "float", "Float32"),
 | 
			
		||||
    ),
 | 
			
		||||
)
 | 
			
		||||
def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype):
 | 
			
		||||
    arr = pd.array(data, dtype=input_dtype)
 | 
			
		||||
    result = to_numeric(arr, downcast=downcast)
 | 
			
		||||
    expected = pd.array(data, dtype=expected_dtype)
 | 
			
		||||
    tm.assert_extension_array_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_downcast_nullable_mask_is_copied():
 | 
			
		||||
    # GH38974
 | 
			
		||||
 | 
			
		||||
    arr = pd.array([1, 2, pd.NA], dtype="Int64")
 | 
			
		||||
 | 
			
		||||
    result = to_numeric(arr, downcast="integer")
 | 
			
		||||
    expected = pd.array([1, 2, pd.NA], dtype="Int8")
 | 
			
		||||
    tm.assert_extension_array_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
    arr[1] = pd.NA  # should not modify result
 | 
			
		||||
    tm.assert_extension_array_equal(result, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_to_numeric_scientific_notation():
 | 
			
		||||
    # GH 15898
 | 
			
		||||
    result = to_numeric("1.7e+308")
 | 
			
		||||
    expected = np.float64(1.7e308)
 | 
			
		||||
    assert result == expected
 | 
			
		||||
		Reference in New Issue
	
	Block a user