针对pulse-transit的工具
This commit is contained in:
299
dist/client/pandas/tests/io/parser/common/test_index.py
vendored
Normal file
299
dist/client/pandas/tests/io/parser/common/test_index.py
vendored
Normal file
@@ -0,0 +1,299 @@
|
||||
"""
|
||||
Tests that work on both the Python and C engines but do not have a
|
||||
specific classification into the other test modules.
|
||||
"""
|
||||
from datetime import datetime
|
||||
from io import StringIO
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
|
||||
|
||||
# GH#43650: Some expected failures with the pyarrow engine can occasionally
|
||||
# cause a deadlock instead, so we skip these instead of xfailing
|
||||
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,kwargs,expected",
|
||||
[
|
||||
(
|
||||
"""foo,2,3,4,5
|
||||
bar,7,8,9,10
|
||||
baz,12,13,14,15
|
||||
qux,12,13,14,15
|
||||
foo2,12,13,14,15
|
||||
bar2,12,13,14,15
|
||||
""",
|
||||
{"index_col": 0, "names": ["index", "A", "B", "C", "D"]},
|
||||
DataFrame(
|
||||
[
|
||||
[2, 3, 4, 5],
|
||||
[7, 8, 9, 10],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
],
|
||||
index=Index(["foo", "bar", "baz", "qux", "foo2", "bar2"], name="index"),
|
||||
columns=["A", "B", "C", "D"],
|
||||
),
|
||||
),
|
||||
(
|
||||
"""foo,one,2,3,4,5
|
||||
foo,two,7,8,9,10
|
||||
foo,three,12,13,14,15
|
||||
bar,one,12,13,14,15
|
||||
bar,two,12,13,14,15
|
||||
""",
|
||||
{"index_col": [0, 1], "names": ["index1", "index2", "A", "B", "C", "D"]},
|
||||
DataFrame(
|
||||
[
|
||||
[2, 3, 4, 5],
|
||||
[7, 8, 9, 10],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
],
|
||||
index=MultiIndex.from_tuples(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("foo", "three"),
|
||||
("bar", "one"),
|
||||
("bar", "two"),
|
||||
],
|
||||
names=["index1", "index2"],
|
||||
),
|
||||
columns=["A", "B", "C", "D"],
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_pass_names_with_index(all_parsers, data, kwargs, expected):
|
||||
parser = all_parsers
|
||||
result = parser.read_csv(StringIO(data), **kwargs)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index_col", [[0, 1], [1, 0]])
|
||||
def test_multi_index_no_level_names(all_parsers, index_col):
|
||||
data = """index1,index2,A,B,C,D
|
||||
foo,one,2,3,4,5
|
||||
foo,two,7,8,9,10
|
||||
foo,three,12,13,14,15
|
||||
bar,one,12,13,14,15
|
||||
bar,two,12,13,14,15
|
||||
"""
|
||||
headless_data = "\n".join(data.split("\n")[1:])
|
||||
|
||||
names = ["A", "B", "C", "D"]
|
||||
parser = all_parsers
|
||||
|
||||
result = parser.read_csv(
|
||||
StringIO(headless_data), index_col=index_col, header=None, names=names
|
||||
)
|
||||
expected = parser.read_csv(StringIO(data), index_col=index_col)
|
||||
|
||||
# No index names in headless data.
|
||||
expected.index.names = [None] * 2
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@xfail_pyarrow
|
||||
def test_multi_index_no_level_names_implicit(all_parsers):
|
||||
parser = all_parsers
|
||||
data = """A,B,C,D
|
||||
foo,one,2,3,4,5
|
||||
foo,two,7,8,9,10
|
||||
foo,three,12,13,14,15
|
||||
bar,one,12,13,14,15
|
||||
bar,two,12,13,14,15
|
||||
"""
|
||||
|
||||
result = parser.read_csv(StringIO(data))
|
||||
expected = DataFrame(
|
||||
[
|
||||
[2, 3, 4, 5],
|
||||
[7, 8, 9, 10],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
],
|
||||
columns=["A", "B", "C", "D"],
|
||||
index=MultiIndex.from_tuples(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("foo", "three"),
|
||||
("bar", "one"),
|
||||
("bar", "two"),
|
||||
]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@xfail_pyarrow
|
||||
@pytest.mark.parametrize(
|
||||
"data,expected,header",
|
||||
[
|
||||
("a,b", DataFrame(columns=["a", "b"]), [0]),
|
||||
(
|
||||
"a,b\nc,d",
|
||||
DataFrame(columns=MultiIndex.from_tuples([("a", "c"), ("b", "d")])),
|
||||
[0, 1],
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("round_trip", [True, False])
|
||||
def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip):
|
||||
# see gh-14545
|
||||
parser = all_parsers
|
||||
data = expected.to_csv(index=False) if round_trip else data
|
||||
|
||||
result = parser.read_csv(StringIO(data), header=header)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@xfail_pyarrow
|
||||
def test_no_unnamed_index(all_parsers):
|
||||
parser = all_parsers
|
||||
data = """ id c0 c1 c2
|
||||
0 1 0 a b
|
||||
1 2 0 c d
|
||||
2 2 2 e f
|
||||
"""
|
||||
result = parser.read_csv(StringIO(data), sep=" ")
|
||||
expected = DataFrame(
|
||||
[[0, 1, 0, "a", "b"], [1, 2, 0, "c", "d"], [2, 2, 2, "e", "f"]],
|
||||
columns=["Unnamed: 0", "id", "c0", "c1", "c2"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_duplicate_index_explicit(all_parsers):
|
||||
data = """index,A,B,C,D
|
||||
foo,2,3,4,5
|
||||
bar,7,8,9,10
|
||||
baz,12,13,14,15
|
||||
qux,12,13,14,15
|
||||
foo,12,13,14,15
|
||||
bar,12,13,14,15
|
||||
"""
|
||||
parser = all_parsers
|
||||
result = parser.read_csv(StringIO(data), index_col=0)
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
[2, 3, 4, 5],
|
||||
[7, 8, 9, 10],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
],
|
||||
columns=["A", "B", "C", "D"],
|
||||
index=Index(["foo", "bar", "baz", "qux", "foo", "bar"], name="index"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@xfail_pyarrow
|
||||
def test_read_duplicate_index_implicit(all_parsers):
|
||||
data = """A,B,C,D
|
||||
foo,2,3,4,5
|
||||
bar,7,8,9,10
|
||||
baz,12,13,14,15
|
||||
qux,12,13,14,15
|
||||
foo,12,13,14,15
|
||||
bar,12,13,14,15
|
||||
"""
|
||||
parser = all_parsers
|
||||
result = parser.read_csv(StringIO(data))
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
[2, 3, 4, 5],
|
||||
[7, 8, 9, 10],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
[12, 13, 14, 15],
|
||||
],
|
||||
columns=["A", "B", "C", "D"],
|
||||
index=Index(["foo", "bar", "baz", "qux", "foo", "bar"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@xfail_pyarrow
|
||||
def test_read_csv_no_index_name(all_parsers, csv_dir_path):
|
||||
parser = all_parsers
|
||||
csv2 = os.path.join(csv_dir_path, "test2.csv")
|
||||
result = parser.read_csv(csv2, index_col=0, parse_dates=True)
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
[0.980269, 3.685731, -0.364216805298, -1.159738, "foo"],
|
||||
[1.047916, -0.041232, -0.16181208307, 0.212549, "bar"],
|
||||
[0.498581, 0.731168, -0.537677223318, 1.346270, "baz"],
|
||||
[1.120202, 1.567621, 0.00364077397681, 0.675253, "qux"],
|
||||
[-0.487094, 0.571455, -1.6116394093, 0.103469, "foo2"],
|
||||
],
|
||||
columns=["A", "B", "C", "D", "E"],
|
||||
index=Index(
|
||||
[
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 5),
|
||||
datetime(2000, 1, 6),
|
||||
datetime(2000, 1, 7),
|
||||
]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@xfail_pyarrow
|
||||
def test_empty_with_index(all_parsers):
|
||||
# see gh-10184
|
||||
data = "x,y"
|
||||
parser = all_parsers
|
||||
result = parser.read_csv(StringIO(data), index_col=0)
|
||||
|
||||
expected = DataFrame(columns=["y"], index=Index([], name="x"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@skip_pyarrow
|
||||
def test_empty_with_multi_index(all_parsers):
|
||||
# see gh-10467
|
||||
data = "x,y,z"
|
||||
parser = all_parsers
|
||||
result = parser.read_csv(StringIO(data), index_col=["x", "y"])
|
||||
|
||||
expected = DataFrame(
|
||||
columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["x", "y"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@skip_pyarrow
|
||||
def test_empty_with_reversed_multi_index(all_parsers):
|
||||
data = "x,y,z"
|
||||
parser = all_parsers
|
||||
result = parser.read_csv(StringIO(data), index_col=[1, 0])
|
||||
|
||||
expected = DataFrame(
|
||||
columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["y", "x"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
Reference in New Issue
Block a user