160 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			160 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import string
 | 
						|
 | 
						|
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
import pandas.util._test_decorators as td
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
import pandas._testing as tm
 | 
						|
from pandas.core.arrays.sparse import (
 | 
						|
    SparseArray,
 | 
						|
    SparseDtype,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
class TestSeriesAccessor:
 | 
						|
    # TODO: collect other Series accessor tests
 | 
						|
    def test_to_dense(self):
 | 
						|
        s = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]")
 | 
						|
        result = s.sparse.to_dense()
 | 
						|
        expected = pd.Series([0, 1, 0, 10])
 | 
						|
        tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
class TestFrameAccessor:
 | 
						|
    def test_accessor_raises(self):
 | 
						|
        df = pd.DataFrame({"A": [0, 1]})
 | 
						|
        with pytest.raises(AttributeError, match="sparse"):
 | 
						|
            df.sparse
 | 
						|
 | 
						|
    @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
 | 
						|
    @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])])
 | 
						|
    @pytest.mark.parametrize("dtype", ["float64", "int64"])
 | 
						|
    @td.skip_if_no_scipy
 | 
						|
    def test_from_spmatrix(self, format, labels, dtype):
 | 
						|
        import scipy.sparse
 | 
						|
 | 
						|
        sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item())
 | 
						|
 | 
						|
        mat = scipy.sparse.eye(10, format=format, dtype=dtype)
 | 
						|
        result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels)
 | 
						|
        expected = pd.DataFrame(
 | 
						|
            np.eye(10, dtype=dtype), index=labels, columns=labels
 | 
						|
        ).astype(sp_dtype)
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("format", ["csc", "csr", "coo"])
 | 
						|
    @td.skip_if_no_scipy
 | 
						|
    def test_from_spmatrix_including_explicit_zero(self, format):
 | 
						|
        import scipy.sparse
 | 
						|
 | 
						|
        mat = scipy.sparse.random(10, 2, density=0.5, format=format)
 | 
						|
        mat.data[0] = 0
 | 
						|
        result = pd.DataFrame.sparse.from_spmatrix(mat)
 | 
						|
        dtype = SparseDtype("float64", 0.0)
 | 
						|
        expected = pd.DataFrame(mat.todense()).astype(dtype)
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "columns",
 | 
						|
        [["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]],
 | 
						|
    )
 | 
						|
    @td.skip_if_no_scipy
 | 
						|
    def test_from_spmatrix_columns(self, columns):
 | 
						|
        import scipy.sparse
 | 
						|
 | 
						|
        dtype = SparseDtype("float64", 0.0)
 | 
						|
 | 
						|
        mat = scipy.sparse.random(10, 2, density=0.5)
 | 
						|
        result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns)
 | 
						|
        expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype)
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "colnames", [("A", "B"), (1, 2), (1, pd.NA), (0.1, 0.2), ("x", "x"), (0, 0)]
 | 
						|
    )
 | 
						|
    @td.skip_if_no_scipy
 | 
						|
    def test_to_coo(self, colnames):
 | 
						|
        import scipy.sparse
 | 
						|
 | 
						|
        df = pd.DataFrame(
 | 
						|
            {colnames[0]: [0, 1, 0], colnames[1]: [1, 0, 0]}, dtype="Sparse[int64, 0]"
 | 
						|
        )
 | 
						|
        result = df.sparse.to_coo()
 | 
						|
        expected = scipy.sparse.coo_matrix(np.asarray(df))
 | 
						|
        assert (result != expected).nnz == 0
 | 
						|
 | 
						|
    @pytest.mark.parametrize("fill_value", [1, np.nan])
 | 
						|
    @td.skip_if_no_scipy
 | 
						|
    def test_to_coo_nonzero_fill_val_raises(self, fill_value):
 | 
						|
        df = pd.DataFrame(
 | 
						|
            {
 | 
						|
                "A": SparseArray(
 | 
						|
                    [fill_value, fill_value, fill_value, 2], fill_value=fill_value
 | 
						|
                ),
 | 
						|
                "B": SparseArray(
 | 
						|
                    [fill_value, 2, fill_value, fill_value], fill_value=fill_value
 | 
						|
                ),
 | 
						|
            }
 | 
						|
        )
 | 
						|
        with pytest.raises(ValueError, match="fill value must be 0"):
 | 
						|
            df.sparse.to_coo()
 | 
						|
 | 
						|
    def test_to_dense(self):
 | 
						|
        df = pd.DataFrame(
 | 
						|
            {
 | 
						|
                "A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)),
 | 
						|
                "B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)),
 | 
						|
                "C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)),
 | 
						|
            },
 | 
						|
            index=["b", "a"],
 | 
						|
        )
 | 
						|
        result = df.sparse.to_dense()
 | 
						|
        expected = pd.DataFrame(
 | 
						|
            {"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"]
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_density(self):
 | 
						|
        df = pd.DataFrame(
 | 
						|
            {
 | 
						|
                "A": SparseArray([1, 0, 2, 1], fill_value=0),
 | 
						|
                "B": SparseArray([0, 1, 1, 1], fill_value=0),
 | 
						|
            }
 | 
						|
        )
 | 
						|
        res = df.sparse.density
 | 
						|
        expected = 0.75
 | 
						|
        assert res == expected
 | 
						|
 | 
						|
    @pytest.mark.parametrize("dtype", ["int64", "float64"])
 | 
						|
    @pytest.mark.parametrize("dense_index", [True, False])
 | 
						|
    @td.skip_if_no_scipy
 | 
						|
    def test_series_from_coo(self, dtype, dense_index):
 | 
						|
        import scipy.sparse
 | 
						|
 | 
						|
        A = scipy.sparse.eye(3, format="coo", dtype=dtype)
 | 
						|
        result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
 | 
						|
        index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
 | 
						|
        expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index)
 | 
						|
        if dense_index:
 | 
						|
            expected = expected.reindex(pd.MultiIndex.from_product(index.levels))
 | 
						|
 | 
						|
        tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
    @td.skip_if_no_scipy
 | 
						|
    def test_series_from_coo_incorrect_format_raises(self):
 | 
						|
        # gh-26554
 | 
						|
        import scipy.sparse
 | 
						|
 | 
						|
        m = scipy.sparse.csr_matrix(np.array([[0, 1], [0, 0]]))
 | 
						|
        with pytest.raises(
 | 
						|
            TypeError, match="Expected coo_matrix. Got csr_matrix instead."
 | 
						|
        ):
 | 
						|
            pd.Series.sparse.from_coo(m)
 | 
						|
 | 
						|
    def test_with_column_named_sparse(self):
 | 
						|
        # https://github.com/pandas-dev/pandas/issues/30758
 | 
						|
        df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])})
 | 
						|
        assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor)
 |