119 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			119 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from datetime import timedelta
 | 
						|
 | 
						|
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
from pandas._libs import iNaT
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
from pandas import (
 | 
						|
    Categorical,
 | 
						|
    Index,
 | 
						|
    NaT,
 | 
						|
    Series,
 | 
						|
    isna,
 | 
						|
)
 | 
						|
import pandas._testing as tm
 | 
						|
 | 
						|
 | 
						|
class TestSeriesMissingData:
 | 
						|
    def test_categorical_nan_handling(self):
 | 
						|
 | 
						|
        # NaNs are represented as -1 in labels
 | 
						|
        s = Series(Categorical(["a", "b", np.nan, "a"]))
 | 
						|
        tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
 | 
						|
        tm.assert_numpy_array_equal(
 | 
						|
            s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8)
 | 
						|
        )
 | 
						|
 | 
						|
    def test_isna_for_inf(self):
 | 
						|
        s = Series(["a", np.inf, np.nan, pd.NA, 1.0])
 | 
						|
        with pd.option_context("mode.use_inf_as_na", True):
 | 
						|
            r = s.isna()
 | 
						|
            dr = s.dropna()
 | 
						|
        e = Series([False, True, True, True, False])
 | 
						|
        de = Series(["a", 1.0], index=[0, 4])
 | 
						|
        tm.assert_series_equal(r, e)
 | 
						|
        tm.assert_series_equal(dr, de)
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "method, expected",
 | 
						|
        [
 | 
						|
            ["isna", Series([False, True, True, False])],
 | 
						|
            ["dropna", Series(["a", 1.0], index=[0, 3])],
 | 
						|
        ],
 | 
						|
    )
 | 
						|
    def test_isnull_for_inf_deprecated(self, method, expected):
 | 
						|
        # gh-17115
 | 
						|
        s = Series(["a", np.inf, np.nan, 1.0])
 | 
						|
        with pd.option_context("mode.use_inf_as_null", True):
 | 
						|
            result = getattr(s, method)()
 | 
						|
        tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
    def test_timedelta64_nan(self):
 | 
						|
 | 
						|
        td = Series([timedelta(days=i) for i in range(10)])
 | 
						|
 | 
						|
        # nan ops on timedeltas
 | 
						|
        td1 = td.copy()
 | 
						|
        td1[0] = np.nan
 | 
						|
        assert isna(td1[0])
 | 
						|
        assert td1[0].value == iNaT
 | 
						|
        td1[0] = td[0]
 | 
						|
        assert not isna(td1[0])
 | 
						|
 | 
						|
        # GH#16674 iNaT is treated as an integer when given by the user
 | 
						|
        td1[1] = iNaT
 | 
						|
        assert not isna(td1[1])
 | 
						|
        assert td1.dtype == np.object_
 | 
						|
        assert td1[1] == iNaT
 | 
						|
        td1[1] = td[1]
 | 
						|
        assert not isna(td1[1])
 | 
						|
 | 
						|
        td1[2] = NaT
 | 
						|
        assert isna(td1[2])
 | 
						|
        assert td1[2].value == iNaT
 | 
						|
        td1[2] = td[2]
 | 
						|
        assert not isna(td1[2])
 | 
						|
 | 
						|
        # boolean setting
 | 
						|
        # GH#2899 boolean setting
 | 
						|
        td3 = np.timedelta64(timedelta(days=3))
 | 
						|
        td7 = np.timedelta64(timedelta(days=7))
 | 
						|
        td[(td > td3) & (td < td7)] = np.nan
 | 
						|
        assert isna(td).sum() == 3
 | 
						|
 | 
						|
    @pytest.mark.xfail(
 | 
						|
        reason="Chained inequality raises when trying to define 'selector'"
 | 
						|
    )
 | 
						|
    def test_logical_range_select(self, datetime_series):
 | 
						|
        # NumPy limitation =(
 | 
						|
        # https://github.com/pandas-dev/pandas/commit/9030dc021f07c76809848925cb34828f6c8484f3
 | 
						|
        np.random.seed(12345)
 | 
						|
        selector = -0.5 <= datetime_series <= 0.5
 | 
						|
        expected = (datetime_series >= -0.5) & (datetime_series <= 0.5)
 | 
						|
        tm.assert_series_equal(selector, expected)
 | 
						|
 | 
						|
    def test_valid(self, datetime_series):
 | 
						|
        ts = datetime_series.copy()
 | 
						|
        ts.index = ts.index._with_freq(None)
 | 
						|
        ts[::2] = np.NaN
 | 
						|
 | 
						|
        result = ts.dropna()
 | 
						|
        assert len(result) == ts.count()
 | 
						|
        tm.assert_series_equal(result, ts[1::2])
 | 
						|
        tm.assert_series_equal(result, ts[pd.notna(ts)])
 | 
						|
 | 
						|
 | 
						|
def test_hasnans_uncached_for_series():
 | 
						|
    # GH#19700
 | 
						|
    idx = Index([0, 1])
 | 
						|
    assert idx.hasnans is False
 | 
						|
    assert "hasnans" in idx._cache
 | 
						|
    ser = idx.to_series()
 | 
						|
    assert ser.hasnans is False
 | 
						|
    assert not hasattr(ser, "_cache")
 | 
						|
    ser.iloc[-1] = np.nan
 | 
						|
    assert ser.hasnans is True
 | 
						|
    assert Series.hasnans.__doc__ == Index.hasnans.__doc__
 |