117 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			117 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
import pandas._testing as tm
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
 | 
						|
def test_compare_axis(align_axis):
 | 
						|
    # GH#30429
 | 
						|
    s1 = pd.Series(["a", "b", "c"])
 | 
						|
    s2 = pd.Series(["x", "b", "z"])
 | 
						|
 | 
						|
    result = s1.compare(s2, align_axis=align_axis)
 | 
						|
 | 
						|
    if align_axis in (1, "columns"):
 | 
						|
        indices = pd.Index([0, 2])
 | 
						|
        columns = pd.Index(["self", "other"])
 | 
						|
        expected = pd.DataFrame(
 | 
						|
            [["a", "x"], ["c", "z"]], index=indices, columns=columns
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
    else:
 | 
						|
        indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
 | 
						|
        expected = pd.Series(["a", "x", "c", "z"], index=indices)
 | 
						|
        tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "keep_shape, keep_equal",
 | 
						|
    [
 | 
						|
        (True, False),
 | 
						|
        (False, True),
 | 
						|
        (True, True),
 | 
						|
        # False, False case is already covered in test_compare_axis
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_compare_various_formats(keep_shape, keep_equal):
 | 
						|
    s1 = pd.Series(["a", "b", "c"])
 | 
						|
    s2 = pd.Series(["x", "b", "z"])
 | 
						|
 | 
						|
    result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal)
 | 
						|
 | 
						|
    if keep_shape:
 | 
						|
        indices = pd.Index([0, 1, 2])
 | 
						|
        columns = pd.Index(["self", "other"])
 | 
						|
        if keep_equal:
 | 
						|
            expected = pd.DataFrame(
 | 
						|
                [["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns
 | 
						|
            )
 | 
						|
        else:
 | 
						|
            expected = pd.DataFrame(
 | 
						|
                [["a", "x"], [np.nan, np.nan], ["c", "z"]],
 | 
						|
                index=indices,
 | 
						|
                columns=columns,
 | 
						|
            )
 | 
						|
    else:
 | 
						|
        indices = pd.Index([0, 2])
 | 
						|
        columns = pd.Index(["self", "other"])
 | 
						|
        expected = pd.DataFrame(
 | 
						|
            [["a", "x"], ["c", "z"]], index=indices, columns=columns
 | 
						|
        )
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_compare_with_equal_nulls():
 | 
						|
    # We want to make sure two NaNs are considered the same
 | 
						|
    # and dropped where applicable
 | 
						|
    s1 = pd.Series(["a", "b", np.nan])
 | 
						|
    s2 = pd.Series(["x", "b", np.nan])
 | 
						|
 | 
						|
    result = s1.compare(s2)
 | 
						|
    expected = pd.DataFrame([["a", "x"]], columns=["self", "other"])
 | 
						|
    tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_compare_with_non_equal_nulls():
 | 
						|
    # We want to make sure the relevant NaNs do not get dropped
 | 
						|
    s1 = pd.Series(["a", "b", "c"])
 | 
						|
    s2 = pd.Series(["x", "b", np.nan])
 | 
						|
 | 
						|
    result = s1.compare(s2, align_axis=0)
 | 
						|
 | 
						|
    indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
 | 
						|
    expected = pd.Series(["a", "x", "c", np.nan], index=indices)
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_compare_multi_index():
 | 
						|
    index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]])
 | 
						|
    s1 = pd.Series(["a", "b", "c"], index=index)
 | 
						|
    s2 = pd.Series(["x", "b", "z"], index=index)
 | 
						|
 | 
						|
    result = s1.compare(s2, align_axis=0)
 | 
						|
 | 
						|
    indices = pd.MultiIndex.from_arrays(
 | 
						|
        [[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]]
 | 
						|
    )
 | 
						|
    expected = pd.Series(["a", "x", "c", "z"], index=indices)
 | 
						|
    tm.assert_series_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_compare_unaligned_objects():
 | 
						|
    # test Series with different indices
 | 
						|
    msg = "Can only compare identically-labeled Series objects"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
 | 
						|
        ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"])
 | 
						|
        ser1.compare(ser2)
 | 
						|
 | 
						|
    # test Series with different lengths
 | 
						|
    msg = "Can only compare identically-labeled Series objects"
 | 
						|
    with pytest.raises(ValueError, match=msg):
 | 
						|
        ser1 = pd.Series([1, 2, 3])
 | 
						|
        ser2 = pd.Series([1, 2, 3, 4])
 | 
						|
        ser1.compare(ser2)
 |