针对pulse-transit的工具

2025-02-22 16:12:02 +08:00
commit 6bc25b4e3a
7719 changed files with 1530886 additions and 0 deletions
--- a/dist/client/pandas/tests/frame/methods/test_explode.py
+++ b/dist/client/pandas/tests/frame/methods/test_explode.py
@@ -0,0 +1,277 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+def test_error():
+    df = pd.DataFrame(
+        {"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
+    )
+    with pytest.raises(
+        ValueError, match="column must be a scalar, tuple, or list thereof"
+    ):
+        df.explode([list("AA")])
+
+    with pytest.raises(ValueError, match="column must be unique"):
+        df.explode(list("AA"))
+
+    df.columns = list("AA")
+    with pytest.raises(ValueError, match="columns must be unique"):
+        df.explode("A")
+
+
+@pytest.mark.parametrize(
+    "input_subset, error_message",
+    [
+        (
+            list("AC"),
+            "columns must have matching element counts",
+        ),
+        (
+            [],
+            "column must be nonempty",
+        ),
+        (
+            list("AC"),
+            "columns must have matching element counts",
+        ),
+    ],
+)
+def test_error_multi_columns(input_subset, error_message):
+    # GH 39240
+    df = pd.DataFrame(
+        {
+            "A": [[0, 1, 2], np.nan, [], (3, 4)],
+            "B": 1,
+            "C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]],
+        },
+        index=list("abcd"),
+    )
+    with pytest.raises(ValueError, match=error_message):
+        df.explode(input_subset)
+
+
+@pytest.mark.parametrize(
+    "scalar",
+    ["a", 0, 1.5, pd.Timedelta("1 days"), pd.Timestamp("2019-12-31")],
+)
+def test_basic(scalar):
+    df = pd.DataFrame(
+        {scalar: pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
+    )
+    result = df.explode(scalar)
+    expected = pd.DataFrame(
+        {
+            scalar: pd.Series(
+                [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
+            ),
+            "B": 1,
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multi_index_rows():
+    df = pd.DataFrame(
+        {"A": np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), "B": 1},
+        index=pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]),
+    )
+
+    result = df.explode("A")
+    expected = pd.DataFrame(
+        {
+            "A": pd.Series(
+                [0, 1, 2, np.nan, np.nan, 3, 4],
+                index=pd.MultiIndex.from_tuples(
+                    [
+                        ("a", 1),
+                        ("a", 1),
+                        ("a", 1),
+                        ("a", 2),
+                        ("b", 1),
+                        ("b", 2),
+                        ("b", 2),
+                    ]
+                ),
+                dtype=object,
+            ),
+            "B": 1,
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_multi_index_columns():
+    df = pd.DataFrame(
+        {("A", 1): np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), ("A", 2): 1}
+    )
+
+    result = df.explode(("A", 1))
+    expected = pd.DataFrame(
+        {
+            ("A", 1): pd.Series(
+                [0, 1, 2, np.nan, np.nan, 3, 4],
+                index=pd.Index([0, 0, 0, 1, 2, 3, 3]),
+                dtype=object,
+            ),
+            ("A", 2): 1,
+        }
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecase():
+    # explode a single column
+    # gh-10511
+    df = pd.DataFrame(
+        [[11, range(5), 10], [22, range(3), 20]], columns=list("ABC")
+    ).set_index("C")
+    result = df.explode("B")
+
+    expected = pd.DataFrame(
+        {
+            "A": [11, 11, 11, 11, 11, 22, 22, 22],
+            "B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=object),
+            "C": [10, 10, 10, 10, 10, 20, 20, 20],
+        },
+        columns=list("ABC"),
+    ).set_index("C")
+
+    tm.assert_frame_equal(result, expected)
+
+    # gh-8517
+    df = pd.DataFrame(
+        [["2014-01-01", "Alice", "A B"], ["2014-01-02", "Bob", "C D"]],
+        columns=["dt", "name", "text"],
+    )
+    result = df.assign(text=df.text.str.split(" ")).explode("text")
+    expected = pd.DataFrame(
+        [
+            ["2014-01-01", "Alice", "A"],
+            ["2014-01-01", "Alice", "B"],
+            ["2014-01-02", "Bob", "C"],
+            ["2014-01-02", "Bob", "D"],
+        ],
+        columns=["dt", "name", "text"],
+        index=[0, 0, 1, 1],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "input_dict, input_index, expected_dict, expected_index",
+    [
+        (
+            {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
+            [0, 0],
+            {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
+            [0, 0, 0, 0],
+        ),
+        (
+            {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
+            pd.Index([0, 0], name="my_index"),
+            {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
+            pd.Index([0, 0, 0, 0], name="my_index"),
+        ),
+        (
+            {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
+            pd.MultiIndex.from_arrays(
+                [[0, 0], [1, 1]], names=["my_first_index", "my_second_index"]
+            ),
+            {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
+            pd.MultiIndex.from_arrays(
+                [[0, 0, 0, 0], [1, 1, 1, 1]],
+                names=["my_first_index", "my_second_index"],
+            ),
+        ),
+        (
+            {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
+            pd.MultiIndex.from_arrays([[0, 0], [1, 1]], names=["my_index", None]),
+            {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
+            pd.MultiIndex.from_arrays(
+                [[0, 0, 0, 0], [1, 1, 1, 1]], names=["my_index", None]
+            ),
+        ),
+    ],
+)
+def test_duplicate_index(input_dict, input_index, expected_dict, expected_index):
+    # GH 28005
+    df = pd.DataFrame(input_dict, index=input_index)
+    result = df.explode("col1")
+    expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_ignore_index():
+    # GH 34932
+    df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]})
+    result = df.explode("values", ignore_index=True)
+    expected = pd.DataFrame(
+        {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_explode_sets():
+    # https://github.com/pandas-dev/pandas/issues/35614
+    df = pd.DataFrame({"a": [{"x", "y"}], "b": [1]}, index=[1])
+    result = df.explode(column="a").sort_values(by="a")
+    expected = pd.DataFrame({"a": ["x", "y"], "b": [1, 1]}, index=[1, 1])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "input_subset, expected_dict, expected_index",
+    [
+        (
+            list("AC"),
+            {
+                "A": pd.Series(
+                    [0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
+                    index=list("aaabcdde"),
+                    dtype=object,
+                ),
+                "B": 1,
+                "C": ["a", "b", "c", "foo", np.nan, "d", "e", np.nan],
+            },
+            list("aaabcdde"),
+        ),
+        (
+            list("A"),
+            {
+                "A": pd.Series(
+                    [0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
+                    index=list("aaabcdde"),
+                    dtype=object,
+                ),
+                "B": 1,
+                "C": [
+                    ["a", "b", "c"],
+                    ["a", "b", "c"],
+                    ["a", "b", "c"],
+                    "foo",
+                    [],
+                    ["d", "e"],
+                    ["d", "e"],
+                    np.nan,
+                ],
+            },
+            list("aaabcdde"),
+        ),
+    ],
+)
+def test_multi_columns(input_subset, expected_dict, expected_index):
+    # GH 39240
+    df = pd.DataFrame(
+        {
+            "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan],
+            "B": 1,
+            "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan],
+        },
+        index=list("abcde"),
+    )
+    result = df.explode(input_subset)
+    expected = pd.DataFrame(expected_dict, expected_index)
+    tm.assert_frame_equal(result, expected)