285 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			285 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
from pandas import (
 | 
						|
    DataFrame,
 | 
						|
    NaT,
 | 
						|
    date_range,
 | 
						|
)
 | 
						|
import pandas._testing as tm
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def float_frame_with_na():
 | 
						|
    """
 | 
						|
    Fixture for DataFrame of floats with index of unique strings
 | 
						|
 | 
						|
    Columns are ['A', 'B', 'C', 'D']; some entries are missing
 | 
						|
 | 
						|
                       A         B         C         D
 | 
						|
    ABwBzA0ljw -1.128865 -0.897161  0.046603  0.274997
 | 
						|
    DJiRzmbyQF  0.728869  0.233502  0.722431 -0.890872
 | 
						|
    neMgPD5UBF  0.486072 -1.027393 -0.031553  1.449522
 | 
						|
    0yWA4n8VeX -1.937191 -1.142531  0.805215 -0.462018
 | 
						|
    3slYUbbqU1  0.153260  1.164691  1.489795 -0.545826
 | 
						|
    soujjZ0A08       NaN       NaN       NaN       NaN
 | 
						|
    7W6NLGsjB9       NaN       NaN       NaN       NaN
 | 
						|
    ...              ...       ...       ...       ...
 | 
						|
    uhfeaNkCR1 -0.231210 -0.340472  0.244717 -0.901590
 | 
						|
    n6p7GYuBIV -0.419052  1.922721 -0.125361 -0.727717
 | 
						|
    ZhzAeY6p1y  1.234374 -1.425359 -0.827038 -0.633189
 | 
						|
    uWdPsORyUh  0.046738 -0.980445 -1.102965  0.605503
 | 
						|
    3DJA6aN590 -0.091018 -1.684734 -1.100900  0.215947
 | 
						|
    2GBPAzdbMk -2.883405 -1.021071  1.209877  1.633083
 | 
						|
    sHadBoyVHw -2.223032 -0.326384  0.258931  0.245517
 | 
						|
 | 
						|
    [30 rows x 4 columns]
 | 
						|
    """
 | 
						|
    df = DataFrame(tm.getSeriesData())
 | 
						|
    # set some NAs
 | 
						|
    df.iloc[5:10] = np.nan
 | 
						|
    df.iloc[15:20, -2:] = np.nan
 | 
						|
    return df
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def bool_frame_with_na():
 | 
						|
    """
 | 
						|
    Fixture for DataFrame of booleans with index of unique strings
 | 
						|
 | 
						|
    Columns are ['A', 'B', 'C', 'D']; some entries are missing
 | 
						|
 | 
						|
                    A      B      C      D
 | 
						|
    zBZxY2IDGd  False  False  False  False
 | 
						|
    IhBWBMWllt  False   True   True   True
 | 
						|
    ctjdvZSR6R   True  False   True   True
 | 
						|
    AVTujptmxb  False   True  False   True
 | 
						|
    G9lrImrSWq  False  False  False   True
 | 
						|
    sFFwdIUfz2    NaN    NaN    NaN    NaN
 | 
						|
    s15ptEJnRb    NaN    NaN    NaN    NaN
 | 
						|
    ...           ...    ...    ...    ...
 | 
						|
    UW41KkDyZ4   True   True  False  False
 | 
						|
    l9l6XkOdqV   True  False  False  False
 | 
						|
    X2MeZfzDYA  False   True  False  False
 | 
						|
    xWkIKU7vfX  False   True  False   True
 | 
						|
    QOhL6VmpGU  False  False  False   True
 | 
						|
    22PwkRJdat  False   True  False  False
 | 
						|
    kfboQ3VeIK   True  False   True  False
 | 
						|
 | 
						|
    [30 rows x 4 columns]
 | 
						|
    """
 | 
						|
    df = DataFrame(tm.getSeriesData()) > 0
 | 
						|
    df = df.astype(object)
 | 
						|
    # set some NAs
 | 
						|
    df.iloc[5:10] = np.nan
 | 
						|
    df.iloc[15:20, -2:] = np.nan
 | 
						|
 | 
						|
    # For `any` tests we need to have at least one True before the first NaN
 | 
						|
    #  in each column
 | 
						|
    for i in range(4):
 | 
						|
        df.iloc[i, i] = True
 | 
						|
    return df
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def float_string_frame():
 | 
						|
    """
 | 
						|
    Fixture for DataFrame of floats and strings with index of unique strings
 | 
						|
 | 
						|
    Columns are ['A', 'B', 'C', 'D', 'foo'].
 | 
						|
 | 
						|
                       A         B         C         D  foo
 | 
						|
    w3orJvq07g -1.594062 -1.084273 -1.252457  0.356460  bar
 | 
						|
    PeukuVdmz2  0.109855 -0.955086 -0.809485  0.409747  bar
 | 
						|
    ahp2KvwiM8 -1.533729 -0.142519 -0.154666  1.302623  bar
 | 
						|
    3WSJ7BUCGd  2.484964  0.213829  0.034778 -2.327831  bar
 | 
						|
    khdAmufk0U -0.193480 -0.743518 -0.077987  0.153646  bar
 | 
						|
    LE2DZiFlrE -0.193566 -1.343194 -0.107321  0.959978  bar
 | 
						|
    HJXSJhVn7b  0.142590  1.257603 -0.659409 -0.223844  bar
 | 
						|
    ...              ...       ...       ...       ...  ...
 | 
						|
    9a1Vypttgw -1.316394  1.601354  0.173596  1.213196  bar
 | 
						|
    h5d1gVFbEy  0.609475  1.106738 -0.155271  0.294630  bar
 | 
						|
    mK9LsTQG92  1.303613  0.857040 -1.019153  0.369468  bar
 | 
						|
    oOLksd9gKH  0.558219 -0.134491 -0.289869 -0.951033  bar
 | 
						|
    9jgoOjKyHg  0.058270 -0.496110 -0.413212 -0.852659  bar
 | 
						|
    jZLDHclHAO  0.096298  1.267510  0.549206 -0.005235  bar
 | 
						|
    lR0nxDp1C2 -2.119350 -0.794384  0.544118  0.145849  bar
 | 
						|
 | 
						|
    [30 rows x 5 columns]
 | 
						|
    """
 | 
						|
    df = DataFrame(tm.getSeriesData())
 | 
						|
    df["foo"] = "bar"
 | 
						|
    return df
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def mixed_float_frame():
 | 
						|
    """
 | 
						|
    Fixture for DataFrame of different float types with index of unique strings
 | 
						|
 | 
						|
    Columns are ['A', 'B', 'C', 'D'].
 | 
						|
 | 
						|
                       A         B         C         D
 | 
						|
    GI7bbDaEZe -0.237908 -0.246225 -0.468506  0.752993
 | 
						|
    KGp9mFepzA -1.140809 -0.644046 -1.225586  0.801588
 | 
						|
    VeVYLAb1l2 -1.154013 -1.677615  0.690430 -0.003731
 | 
						|
    kmPME4WKhO  0.979578  0.998274 -0.776367  0.897607
 | 
						|
    CPyopdXTiz  0.048119 -0.257174  0.836426  0.111266
 | 
						|
    0kJZQndAj0  0.274357 -0.281135 -0.344238  0.834541
 | 
						|
    tqdwQsaHG8 -0.979716 -0.519897  0.582031  0.144710
 | 
						|
    ...              ...       ...       ...       ...
 | 
						|
    7FhZTWILQj -2.906357  1.261039 -0.780273 -0.537237
 | 
						|
    4pUDPM4eGq -2.042512 -0.464382 -0.382080  1.132612
 | 
						|
    B8dUgUzwTi -1.506637 -0.364435  1.087891  0.297653
 | 
						|
    hErlVYjVv9  1.477453 -0.495515 -0.713867  1.438427
 | 
						|
    1BKN3o7YLs  0.127535 -0.349812 -0.881836  0.489827
 | 
						|
    9S4Ekn7zga  1.445518 -2.095149  0.031982  0.373204
 | 
						|
    xN1dNn6OV6  1.425017 -0.983995 -0.363281 -0.224502
 | 
						|
 | 
						|
    [30 rows x 4 columns]
 | 
						|
    """
 | 
						|
    df = DataFrame(tm.getSeriesData())
 | 
						|
    df.A = df.A.astype("float32")
 | 
						|
    df.B = df.B.astype("float32")
 | 
						|
    df.C = df.C.astype("float16")
 | 
						|
    df.D = df.D.astype("float64")
 | 
						|
    return df
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def mixed_int_frame():
 | 
						|
    """
 | 
						|
    Fixture for DataFrame of different int types with index of unique strings
 | 
						|
 | 
						|
    Columns are ['A', 'B', 'C', 'D'].
 | 
						|
 | 
						|
                A  B    C    D
 | 
						|
    mUrCZ67juP  0  1    2    2
 | 
						|
    rw99ACYaKS  0  1    0    0
 | 
						|
    7QsEcpaaVU  0  1    1    1
 | 
						|
    xkrimI2pcE  0  1    0    0
 | 
						|
    dz01SuzoS8  0  1  255  255
 | 
						|
    ccQkqOHX75 -1  1    0    0
 | 
						|
    DN0iXaoDLd  0  1    0    0
 | 
						|
    ...        .. ..  ...  ...
 | 
						|
    Dfb141wAaQ  1  1  254  254
 | 
						|
    IPD8eQOVu5  0  1    0    0
 | 
						|
    CcaKulsCmv  0  1    0    0
 | 
						|
    rIBa8gu7E5  0  1    0    0
 | 
						|
    RP6peZmh5o  0  1    1    1
 | 
						|
    NMb9pipQWQ  0  1    0    0
 | 
						|
    PqgbJEzjib  0  1    3    3
 | 
						|
 | 
						|
    [30 rows x 4 columns]
 | 
						|
    """
 | 
						|
    df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
 | 
						|
    df.A = df.A.astype("int32")
 | 
						|
    df.B = np.ones(len(df.B), dtype="uint64")
 | 
						|
    df.C = df.C.astype("uint8")
 | 
						|
    df.D = df.C.astype("int64")
 | 
						|
    return df
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def timezone_frame():
 | 
						|
    """
 | 
						|
    Fixture for DataFrame of date_range Series with different time zones
 | 
						|
 | 
						|
    Columns are ['A', 'B', 'C']; some entries are missing
 | 
						|
 | 
						|
               A                         B                         C
 | 
						|
    0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
 | 
						|
    1 2013-01-02                       NaT                       NaT
 | 
						|
    2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
 | 
						|
    """
 | 
						|
    df = DataFrame(
 | 
						|
        {
 | 
						|
            "A": date_range("20130101", periods=3),
 | 
						|
            "B": date_range("20130101", periods=3, tz="US/Eastern"),
 | 
						|
            "C": date_range("20130101", periods=3, tz="CET"),
 | 
						|
        }
 | 
						|
    )
 | 
						|
    df.iloc[1, 1] = NaT
 | 
						|
    df.iloc[1, 2] = NaT
 | 
						|
    return df
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def uint64_frame():
 | 
						|
    """
 | 
						|
    Fixture for DataFrame with uint64 values
 | 
						|
 | 
						|
    Columns are ['A', 'B']
 | 
						|
    """
 | 
						|
    return DataFrame(
 | 
						|
        {"A": np.arange(3), "B": [2**63, 2**63 + 5, 2**63 + 10]}, dtype=np.uint64
 | 
						|
    )
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def simple_frame():
 | 
						|
    """
 | 
						|
    Fixture for simple 3x3 DataFrame
 | 
						|
 | 
						|
    Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
 | 
						|
 | 
						|
       one  two  three
 | 
						|
    a  1.0  2.0    3.0
 | 
						|
    b  4.0  5.0    6.0
 | 
						|
    c  7.0  8.0    9.0
 | 
						|
    """
 | 
						|
    arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
 | 
						|
 | 
						|
    return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def frame_of_index_cols():
 | 
						|
    """
 | 
						|
    Fixture for DataFrame of columns that can be used for indexing
 | 
						|
 | 
						|
    Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
 | 
						|
    'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
 | 
						|
 | 
						|
         A      B  C         D         E  (tuple, as, label)
 | 
						|
    0  foo    one  a  0.608477 -0.012500           -1.664297
 | 
						|
    1  foo    two  b -0.633460  0.249614           -0.364411
 | 
						|
    2  foo  three  c  0.615256  2.154968           -0.834666
 | 
						|
    3  bar    one  d  0.234246  1.085675            0.718445
 | 
						|
    4  bar    two  e  0.533841 -0.005702           -3.533912
 | 
						|
    """
 | 
						|
    df = DataFrame(
 | 
						|
        {
 | 
						|
            "A": ["foo", "foo", "foo", "bar", "bar"],
 | 
						|
            "B": ["one", "two", "three", "one", "two"],
 | 
						|
            "C": ["a", "b", "c", "d", "e"],
 | 
						|
            "D": np.random.randn(5),
 | 
						|
            "E": np.random.randn(5),
 | 
						|
            ("tuple", "as", "label"): np.random.randn(5),
 | 
						|
        }
 | 
						|
    )
 | 
						|
    return df
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture(
 | 
						|
    params=[
 | 
						|
        "any",
 | 
						|
        "all",
 | 
						|
        "count",
 | 
						|
        "sum",
 | 
						|
        "prod",
 | 
						|
        "max",
 | 
						|
        "min",
 | 
						|
        "mean",
 | 
						|
        "median",
 | 
						|
        "skew",
 | 
						|
        "kurt",
 | 
						|
        "sem",
 | 
						|
        "var",
 | 
						|
        "std",
 | 
						|
        "mad",
 | 
						|
    ]
 | 
						|
)
 | 
						|
def reduction_functions(request):
 | 
						|
    return request.param
 |