1339 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1339 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from datetime import (
 | 
						|
    date,
 | 
						|
    datetime,
 | 
						|
    timedelta,
 | 
						|
)
 | 
						|
from functools import partial
 | 
						|
from io import BytesIO
 | 
						|
import os
 | 
						|
import re
 | 
						|
 | 
						|
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
import pandas.util._test_decorators as td
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
from pandas import (
 | 
						|
    DataFrame,
 | 
						|
    Index,
 | 
						|
    MultiIndex,
 | 
						|
    get_option,
 | 
						|
    set_option,
 | 
						|
)
 | 
						|
import pandas._testing as tm
 | 
						|
 | 
						|
from pandas.io.excel import (
 | 
						|
    ExcelFile,
 | 
						|
    ExcelWriter,
 | 
						|
    _OpenpyxlWriter,
 | 
						|
    _XlsxWriter,
 | 
						|
    _XlwtWriter,
 | 
						|
    register_writer,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def path(ext):
 | 
						|
    """
 | 
						|
    Fixture to open file for use in each test case.
 | 
						|
    """
 | 
						|
    with tm.ensure_clean(ext) as file_path:
 | 
						|
        yield file_path
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture
 | 
						|
def set_engine(engine, ext):
 | 
						|
    """
 | 
						|
    Fixture to set engine for use in each test case.
 | 
						|
 | 
						|
    Rather than requiring `engine=...` to be provided explicitly as an
 | 
						|
    argument in each test, this fixture sets a global option to dictate
 | 
						|
    which engine should be used to write Excel files. After executing
 | 
						|
    the test it rolls back said change to the global option.
 | 
						|
    """
 | 
						|
    option_name = f"io.excel.{ext.strip('.')}.writer"
 | 
						|
    prev_engine = get_option(option_name)
 | 
						|
    set_option(option_name, engine)
 | 
						|
    yield
 | 
						|
    set_option(option_name, prev_engine)  # Roll back option change
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "ext",
 | 
						|
    [
 | 
						|
        pytest.param(".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]),
 | 
						|
        pytest.param(".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]),
 | 
						|
        pytest.param(".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]),
 | 
						|
        pytest.param(
 | 
						|
            ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")]
 | 
						|
        ),
 | 
						|
        pytest.param(".ods", marks=td.skip_if_no("odf")),
 | 
						|
    ],
 | 
						|
)
 | 
						|
class TestRoundTrip:
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "header,expected",
 | 
						|
        [(None, DataFrame([np.nan] * 4)), (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))],
 | 
						|
    )
 | 
						|
    def test_read_one_empty_col_no_header(self, ext, header, expected):
 | 
						|
        # xref gh-12292
 | 
						|
        filename = "no_header"
 | 
						|
        df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]])
 | 
						|
 | 
						|
        with tm.ensure_clean(ext) as path:
 | 
						|
            df.to_excel(path, filename, index=False, header=False)
 | 
						|
            result = pd.read_excel(
 | 
						|
                path, sheet_name=filename, usecols=[0], header=header
 | 
						|
            )
 | 
						|
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "header,expected",
 | 
						|
        [(None, DataFrame([0] + [np.nan] * 4)), (0, DataFrame([np.nan] * 4))],
 | 
						|
    )
 | 
						|
    def test_read_one_empty_col_with_header(self, ext, header, expected):
 | 
						|
        filename = "with_header"
 | 
						|
        df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]])
 | 
						|
 | 
						|
        with tm.ensure_clean(ext) as path:
 | 
						|
            df.to_excel(path, "with_header", index=False, header=True)
 | 
						|
            result = pd.read_excel(
 | 
						|
                path, sheet_name=filename, usecols=[0], header=header
 | 
						|
            )
 | 
						|
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_set_column_names_in_parameter(self, ext):
 | 
						|
        # GH 12870 : pass down column names associated with
 | 
						|
        # keyword argument names
 | 
						|
        refdf = DataFrame([[1, "foo"], [2, "bar"], [3, "baz"]], columns=["a", "b"])
 | 
						|
 | 
						|
        with tm.ensure_clean(ext) as pth:
 | 
						|
            with ExcelWriter(pth) as writer:
 | 
						|
                refdf.to_excel(writer, "Data_no_head", header=False, index=False)
 | 
						|
                refdf.to_excel(writer, "Data_with_head", index=False)
 | 
						|
 | 
						|
            refdf.columns = ["A", "B"]
 | 
						|
 | 
						|
            with ExcelFile(pth) as reader:
 | 
						|
                xlsdf_no_head = pd.read_excel(
 | 
						|
                    reader, sheet_name="Data_no_head", header=None, names=["A", "B"]
 | 
						|
                )
 | 
						|
                xlsdf_with_head = pd.read_excel(
 | 
						|
                    reader,
 | 
						|
                    sheet_name="Data_with_head",
 | 
						|
                    index_col=None,
 | 
						|
                    names=["A", "B"],
 | 
						|
                )
 | 
						|
 | 
						|
            tm.assert_frame_equal(xlsdf_no_head, refdf)
 | 
						|
            tm.assert_frame_equal(xlsdf_with_head, refdf)
 | 
						|
 | 
						|
    def test_creating_and_reading_multiple_sheets(self, ext):
 | 
						|
        # see gh-9450
 | 
						|
        #
 | 
						|
        # Test reading multiple sheets, from a runtime
 | 
						|
        # created Excel file with multiple sheets.
 | 
						|
        def tdf(col_sheet_name):
 | 
						|
            d, i = [11, 22, 33], [1, 2, 3]
 | 
						|
            return DataFrame(d, i, columns=[col_sheet_name])
 | 
						|
 | 
						|
        sheets = ["AAA", "BBB", "CCC"]
 | 
						|
 | 
						|
        dfs = [tdf(s) for s in sheets]
 | 
						|
        dfs = dict(zip(sheets, dfs))
 | 
						|
 | 
						|
        with tm.ensure_clean(ext) as pth:
 | 
						|
            with ExcelWriter(pth) as ew:
 | 
						|
                for sheetname, df in dfs.items():
 | 
						|
                    df.to_excel(ew, sheetname)
 | 
						|
 | 
						|
            dfs_returned = pd.read_excel(pth, sheet_name=sheets, index_col=0)
 | 
						|
 | 
						|
            for s in sheets:
 | 
						|
                tm.assert_frame_equal(dfs[s], dfs_returned[s])
 | 
						|
 | 
						|
    def test_read_excel_multiindex_empty_level(self, ext):
 | 
						|
        # see gh-12453
 | 
						|
        with tm.ensure_clean(ext) as path:
 | 
						|
            df = DataFrame(
 | 
						|
                {
 | 
						|
                    ("One", "x"): {0: 1},
 | 
						|
                    ("Two", "X"): {0: 3},
 | 
						|
                    ("Two", "Y"): {0: 7},
 | 
						|
                    ("Zero", ""): {0: 0},
 | 
						|
                }
 | 
						|
            )
 | 
						|
 | 
						|
            expected = DataFrame(
 | 
						|
                {
 | 
						|
                    ("One", "x"): {0: 1},
 | 
						|
                    ("Two", "X"): {0: 3},
 | 
						|
                    ("Two", "Y"): {0: 7},
 | 
						|
                    ("Zero", "Unnamed: 4_level_1"): {0: 0},
 | 
						|
                }
 | 
						|
            )
 | 
						|
 | 
						|
            df.to_excel(path)
 | 
						|
            actual = pd.read_excel(path, header=[0, 1], index_col=0)
 | 
						|
            tm.assert_frame_equal(actual, expected)
 | 
						|
 | 
						|
            df = DataFrame(
 | 
						|
                {
 | 
						|
                    ("Beg", ""): {0: 0},
 | 
						|
                    ("Middle", "x"): {0: 1},
 | 
						|
                    ("Tail", "X"): {0: 3},
 | 
						|
                    ("Tail", "Y"): {0: 7},
 | 
						|
                }
 | 
						|
            )
 | 
						|
 | 
						|
            expected = DataFrame(
 | 
						|
                {
 | 
						|
                    ("Beg", "Unnamed: 1_level_1"): {0: 0},
 | 
						|
                    ("Middle", "x"): {0: 1},
 | 
						|
                    ("Tail", "X"): {0: 3},
 | 
						|
                    ("Tail", "Y"): {0: 7},
 | 
						|
                }
 | 
						|
            )
 | 
						|
 | 
						|
            df.to_excel(path)
 | 
						|
            actual = pd.read_excel(path, header=[0, 1], index_col=0)
 | 
						|
            tm.assert_frame_equal(actual, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("c_idx_names", [True, False])
 | 
						|
    @pytest.mark.parametrize("r_idx_names", [True, False])
 | 
						|
    @pytest.mark.parametrize("c_idx_levels", [1, 3])
 | 
						|
    @pytest.mark.parametrize("r_idx_levels", [1, 3])
 | 
						|
    def test_excel_multindex_roundtrip(
 | 
						|
        self, ext, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels, request
 | 
						|
    ):
 | 
						|
        # see gh-4679
 | 
						|
        with tm.ensure_clean(ext) as pth:
 | 
						|
            if (c_idx_levels == 1 and c_idx_names) and not (
 | 
						|
                r_idx_levels == 3 and not r_idx_names
 | 
						|
            ):
 | 
						|
                mark = pytest.mark.xfail(
 | 
						|
                    reason="Column index name cannot be serialized unless "
 | 
						|
                    "it's a MultiIndex"
 | 
						|
                )
 | 
						|
                request.node.add_marker(mark)
 | 
						|
 | 
						|
            # Empty name case current read in as
 | 
						|
            # unnamed levels, not Nones.
 | 
						|
            check_names = r_idx_names or r_idx_levels <= 1
 | 
						|
 | 
						|
            df = tm.makeCustomDataframe(
 | 
						|
                5, 5, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels
 | 
						|
            )
 | 
						|
            df.to_excel(pth)
 | 
						|
 | 
						|
            act = pd.read_excel(
 | 
						|
                pth,
 | 
						|
                index_col=list(range(r_idx_levels)),
 | 
						|
                header=list(range(c_idx_levels)),
 | 
						|
            )
 | 
						|
            tm.assert_frame_equal(df, act, check_names=check_names)
 | 
						|
 | 
						|
            df.iloc[0, :] = np.nan
 | 
						|
            df.to_excel(pth)
 | 
						|
 | 
						|
            act = pd.read_excel(
 | 
						|
                pth,
 | 
						|
                index_col=list(range(r_idx_levels)),
 | 
						|
                header=list(range(c_idx_levels)),
 | 
						|
            )
 | 
						|
            tm.assert_frame_equal(df, act, check_names=check_names)
 | 
						|
 | 
						|
            df.iloc[-1, :] = np.nan
 | 
						|
            df.to_excel(pth)
 | 
						|
            act = pd.read_excel(
 | 
						|
                pth,
 | 
						|
                index_col=list(range(r_idx_levels)),
 | 
						|
                header=list(range(c_idx_levels)),
 | 
						|
            )
 | 
						|
            tm.assert_frame_equal(df, act, check_names=check_names)
 | 
						|
 | 
						|
    def test_read_excel_parse_dates(self, ext):
 | 
						|
        # see gh-11544, gh-12051
 | 
						|
        df = DataFrame(
 | 
						|
            {"col": [1, 2, 3], "date_strings": pd.date_range("2012-01-01", periods=3)}
 | 
						|
        )
 | 
						|
        df2 = df.copy()
 | 
						|
        df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y")
 | 
						|
 | 
						|
        with tm.ensure_clean(ext) as pth:
 | 
						|
            df2.to_excel(pth)
 | 
						|
 | 
						|
            res = pd.read_excel(pth, index_col=0)
 | 
						|
            tm.assert_frame_equal(df2, res)
 | 
						|
 | 
						|
            res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0)
 | 
						|
            tm.assert_frame_equal(df, res)
 | 
						|
 | 
						|
            date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y")
 | 
						|
            res = pd.read_excel(
 | 
						|
                pth, parse_dates=["date_strings"], date_parser=date_parser, index_col=0
 | 
						|
            )
 | 
						|
            tm.assert_frame_equal(df, res)
 | 
						|
 | 
						|
    def test_multiindex_interval_datetimes(self, ext):
 | 
						|
        # GH 30986
 | 
						|
        midx = MultiIndex.from_arrays(
 | 
						|
            [
 | 
						|
                range(4),
 | 
						|
                pd.interval_range(
 | 
						|
                    start=pd.Timestamp("2020-01-01"), periods=4, freq="6M"
 | 
						|
                ),
 | 
						|
            ]
 | 
						|
        )
 | 
						|
        df = DataFrame(range(4), index=midx)
 | 
						|
        with tm.ensure_clean(ext) as pth:
 | 
						|
            df.to_excel(pth)
 | 
						|
            result = pd.read_excel(pth, index_col=[0, 1])
 | 
						|
        expected = DataFrame(
 | 
						|
            range(4),
 | 
						|
            MultiIndex.from_arrays(
 | 
						|
                [
 | 
						|
                    range(4),
 | 
						|
                    [
 | 
						|
                        "(2020-01-31, 2020-07-31]",
 | 
						|
                        "(2020-07-31, 2021-01-31]",
 | 
						|
                        "(2021-01-31, 2021-07-31]",
 | 
						|
                        "(2021-07-31, 2022-01-31]",
 | 
						|
                    ],
 | 
						|
                ]
 | 
						|
            ),
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "engine,ext",
 | 
						|
    [
 | 
						|
        pytest.param(
 | 
						|
            "openpyxl",
 | 
						|
            ".xlsx",
 | 
						|
            marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")],
 | 
						|
        ),
 | 
						|
        pytest.param(
 | 
						|
            "openpyxl",
 | 
						|
            ".xlsm",
 | 
						|
            marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")],
 | 
						|
        ),
 | 
						|
        pytest.param(
 | 
						|
            "xlwt", ".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]
 | 
						|
        ),
 | 
						|
        pytest.param(
 | 
						|
            "xlsxwriter",
 | 
						|
            ".xlsx",
 | 
						|
            marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")],
 | 
						|
        ),
 | 
						|
        pytest.param("odf", ".ods", marks=td.skip_if_no("odf")),
 | 
						|
    ],
 | 
						|
)
 | 
						|
@pytest.mark.usefixtures("set_engine")
 | 
						|
class TestExcelWriter:
 | 
						|
    def test_excel_sheet_size(self, path):
 | 
						|
 | 
						|
        # GH 26080
 | 
						|
        breaking_row_count = 2**20 + 1
 | 
						|
        breaking_col_count = 2**14 + 1
 | 
						|
        # purposely using two arrays to prevent memory issues while testing
 | 
						|
        row_arr = np.zeros(shape=(breaking_row_count, 1))
 | 
						|
        col_arr = np.zeros(shape=(1, breaking_col_count))
 | 
						|
        row_df = DataFrame(row_arr)
 | 
						|
        col_df = DataFrame(col_arr)
 | 
						|
 | 
						|
        msg = "sheet is too large"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            row_df.to_excel(path)
 | 
						|
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            col_df.to_excel(path)
 | 
						|
 | 
						|
    def test_excel_sheet_by_name_raise(self, path, engine):
 | 
						|
        gt = DataFrame(np.random.randn(10, 2))
 | 
						|
        gt.to_excel(path)
 | 
						|
 | 
						|
        with ExcelFile(path) as xl:
 | 
						|
            df = pd.read_excel(xl, sheet_name=0, index_col=0)
 | 
						|
 | 
						|
        tm.assert_frame_equal(gt, df)
 | 
						|
 | 
						|
        msg = "Worksheet named '0' not found"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            pd.read_excel(xl, "0")
 | 
						|
 | 
						|
    def test_excel_writer_context_manager(self, frame, path):
 | 
						|
        with ExcelWriter(path) as writer:
 | 
						|
            frame.to_excel(writer, "Data1")
 | 
						|
            frame2 = frame.copy()
 | 
						|
            frame2.columns = frame.columns[::-1]
 | 
						|
            frame2.to_excel(writer, "Data2")
 | 
						|
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            found_df = pd.read_excel(reader, sheet_name="Data1", index_col=0)
 | 
						|
            found_df2 = pd.read_excel(reader, sheet_name="Data2", index_col=0)
 | 
						|
 | 
						|
            tm.assert_frame_equal(found_df, frame)
 | 
						|
            tm.assert_frame_equal(found_df2, frame2)
 | 
						|
 | 
						|
    def test_roundtrip(self, frame, path):
 | 
						|
        frame = frame.copy()
 | 
						|
        frame["A"][:5] = np.nan
 | 
						|
 | 
						|
        frame.to_excel(path, "test1")
 | 
						|
        frame.to_excel(path, "test1", columns=["A", "B"])
 | 
						|
        frame.to_excel(path, "test1", header=False)
 | 
						|
        frame.to_excel(path, "test1", index=False)
 | 
						|
 | 
						|
        # test roundtrip
 | 
						|
        frame.to_excel(path, "test1")
 | 
						|
        recons = pd.read_excel(path, sheet_name="test1", index_col=0)
 | 
						|
        tm.assert_frame_equal(frame, recons)
 | 
						|
 | 
						|
        frame.to_excel(path, "test1", index=False)
 | 
						|
        recons = pd.read_excel(path, sheet_name="test1", index_col=None)
 | 
						|
        recons.index = frame.index
 | 
						|
        tm.assert_frame_equal(frame, recons)
 | 
						|
 | 
						|
        frame.to_excel(path, "test1", na_rep="NA")
 | 
						|
        recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["NA"])
 | 
						|
        tm.assert_frame_equal(frame, recons)
 | 
						|
 | 
						|
        # GH 3611
 | 
						|
        frame.to_excel(path, "test1", na_rep="88")
 | 
						|
        recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["88"])
 | 
						|
        tm.assert_frame_equal(frame, recons)
 | 
						|
 | 
						|
        frame.to_excel(path, "test1", na_rep="88")
 | 
						|
        recons = pd.read_excel(
 | 
						|
            path, sheet_name="test1", index_col=0, na_values=[88, 88.0]
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(frame, recons)
 | 
						|
 | 
						|
        # GH 6573
 | 
						|
        frame.to_excel(path, "Sheet1")
 | 
						|
        recons = pd.read_excel(path, index_col=0)
 | 
						|
        tm.assert_frame_equal(frame, recons)
 | 
						|
 | 
						|
        frame.to_excel(path, "0")
 | 
						|
        recons = pd.read_excel(path, index_col=0)
 | 
						|
        tm.assert_frame_equal(frame, recons)
 | 
						|
 | 
						|
        # GH 8825 Pandas Series should provide to_excel method
 | 
						|
        s = frame["A"]
 | 
						|
        s.to_excel(path)
 | 
						|
        recons = pd.read_excel(path, index_col=0)
 | 
						|
        tm.assert_frame_equal(s.to_frame(), recons)
 | 
						|
 | 
						|
    def test_mixed(self, frame, path):
 | 
						|
        mixed_frame = frame.copy()
 | 
						|
        mixed_frame["foo"] = "bar"
 | 
						|
 | 
						|
        mixed_frame.to_excel(path, "test1")
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
        tm.assert_frame_equal(mixed_frame, recons)
 | 
						|
 | 
						|
    def test_ts_frame(self, tsframe, path):
 | 
						|
        df = tsframe
 | 
						|
 | 
						|
        # freq doesn't round-trip
 | 
						|
        index = pd.DatetimeIndex(np.asarray(df.index), freq=None)
 | 
						|
        df.index = index
 | 
						|
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
        tm.assert_frame_equal(df, recons)
 | 
						|
 | 
						|
    def test_basics_with_nan(self, frame, path):
 | 
						|
        frame = frame.copy()
 | 
						|
        frame["A"][:5] = np.nan
 | 
						|
        frame.to_excel(path, "test1")
 | 
						|
        frame.to_excel(path, "test1", columns=["A", "B"])
 | 
						|
        frame.to_excel(path, "test1", header=False)
 | 
						|
        frame.to_excel(path, "test1", index=False)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("np_type", [np.int8, np.int16, np.int32, np.int64])
 | 
						|
    def test_int_types(self, np_type, path):
 | 
						|
        # Test np.int values read come back as int
 | 
						|
        # (rather than float which is Excel's format).
 | 
						|
        df = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type)
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
 | 
						|
        int_frame = df.astype(np.int64)
 | 
						|
        tm.assert_frame_equal(int_frame, recons)
 | 
						|
 | 
						|
        recons2 = pd.read_excel(path, sheet_name="test1", index_col=0)
 | 
						|
        tm.assert_frame_equal(int_frame, recons2)
 | 
						|
 | 
						|
        # Test with convert_float=False comes back as float.
 | 
						|
        float_frame = df.astype(float)
 | 
						|
        float_frame.columns = float_frame.columns.astype(float)
 | 
						|
        float_frame.index = float_frame.index.astype(float)
 | 
						|
        with tm.assert_produces_warning(
 | 
						|
            FutureWarning, match="convert_float is deprecated"
 | 
						|
        ):
 | 
						|
            recons = pd.read_excel(
 | 
						|
                path, sheet_name="test1", convert_float=False, index_col=0
 | 
						|
            )
 | 
						|
        tm.assert_frame_equal(recons, float_frame)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64])
 | 
						|
    def test_float_types(self, np_type, path):
 | 
						|
        # Test np.float values read come back as float.
 | 
						|
        df = DataFrame(np.random.random_sample(10), dtype=np_type)
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
 | 
						|
                np_type
 | 
						|
            )
 | 
						|
 | 
						|
        tm.assert_frame_equal(df, recons)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("np_type", [np.bool8, np.bool_])
 | 
						|
    def test_bool_types(self, np_type, path):
 | 
						|
        # Test np.bool8 and np.bool_ values read come back as float.
 | 
						|
        df = DataFrame([1, 0, True, False], dtype=np_type)
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
 | 
						|
                np_type
 | 
						|
            )
 | 
						|
 | 
						|
        tm.assert_frame_equal(df, recons)
 | 
						|
 | 
						|
    def test_inf_roundtrip(self, path):
 | 
						|
        df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
 | 
						|
        tm.assert_frame_equal(df, recons)
 | 
						|
 | 
						|
    def test_sheets(self, frame, tsframe, path):
 | 
						|
 | 
						|
        # freq doesn't round-trip
 | 
						|
        index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None)
 | 
						|
        tsframe.index = index
 | 
						|
 | 
						|
        frame = frame.copy()
 | 
						|
        frame["A"][:5] = np.nan
 | 
						|
 | 
						|
        frame.to_excel(path, "test1")
 | 
						|
        frame.to_excel(path, "test1", columns=["A", "B"])
 | 
						|
        frame.to_excel(path, "test1", header=False)
 | 
						|
        frame.to_excel(path, "test1", index=False)
 | 
						|
 | 
						|
        # Test writing to separate sheets
 | 
						|
        with ExcelWriter(path) as writer:
 | 
						|
            frame.to_excel(writer, "test1")
 | 
						|
            tsframe.to_excel(writer, "test2")
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
            tm.assert_frame_equal(frame, recons)
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test2", index_col=0)
 | 
						|
            tm.assert_frame_equal(tsframe, recons)
 | 
						|
        assert 2 == len(reader.sheet_names)
 | 
						|
        assert "test1" == reader.sheet_names[0]
 | 
						|
        assert "test2" == reader.sheet_names[1]
 | 
						|
 | 
						|
    def test_colaliases(self, frame, path):
 | 
						|
        frame = frame.copy()
 | 
						|
        frame["A"][:5] = np.nan
 | 
						|
 | 
						|
        frame.to_excel(path, "test1")
 | 
						|
        frame.to_excel(path, "test1", columns=["A", "B"])
 | 
						|
        frame.to_excel(path, "test1", header=False)
 | 
						|
        frame.to_excel(path, "test1", index=False)
 | 
						|
 | 
						|
        # column aliases
 | 
						|
        col_aliases = Index(["AA", "X", "Y", "Z"])
 | 
						|
        frame.to_excel(path, "test1", header=col_aliases)
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            rs = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
        xp = frame.copy()
 | 
						|
        xp.columns = col_aliases
 | 
						|
        tm.assert_frame_equal(xp, rs)
 | 
						|
 | 
						|
    def test_roundtrip_indexlabels(self, merge_cells, frame, path):
 | 
						|
        frame = frame.copy()
 | 
						|
        frame["A"][:5] = np.nan
 | 
						|
 | 
						|
        frame.to_excel(path, "test1")
 | 
						|
        frame.to_excel(path, "test1", columns=["A", "B"])
 | 
						|
        frame.to_excel(path, "test1", header=False)
 | 
						|
        frame.to_excel(path, "test1", index=False)
 | 
						|
 | 
						|
        # test index_label
 | 
						|
        df = DataFrame(np.random.randn(10, 2)) >= 0
 | 
						|
        df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells)
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
 | 
						|
                np.int64
 | 
						|
            )
 | 
						|
        df.index.names = ["test"]
 | 
						|
        assert df.index.names == recons.index.names
 | 
						|
 | 
						|
        df = DataFrame(np.random.randn(10, 2)) >= 0
 | 
						|
        df.to_excel(
 | 
						|
            path,
 | 
						|
            "test1",
 | 
						|
            index_label=["test", "dummy", "dummy2"],
 | 
						|
            merge_cells=merge_cells,
 | 
						|
        )
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
 | 
						|
                np.int64
 | 
						|
            )
 | 
						|
        df.index.names = ["test"]
 | 
						|
        assert df.index.names == recons.index.names
 | 
						|
 | 
						|
        df = DataFrame(np.random.randn(10, 2)) >= 0
 | 
						|
        df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells)
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
 | 
						|
                np.int64
 | 
						|
            )
 | 
						|
        df.index.names = ["test"]
 | 
						|
        tm.assert_frame_equal(df, recons.astype(bool))
 | 
						|
 | 
						|
        frame.to_excel(
 | 
						|
            path,
 | 
						|
            "test1",
 | 
						|
            columns=["A", "B", "C", "D"],
 | 
						|
            index=False,
 | 
						|
            merge_cells=merge_cells,
 | 
						|
        )
 | 
						|
        # take 'A' and 'B' as indexes (same row as cols 'C', 'D')
 | 
						|
        df = frame.copy()
 | 
						|
        df = df.set_index(["A", "B"])
 | 
						|
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1])
 | 
						|
        tm.assert_frame_equal(df, recons)
 | 
						|
 | 
						|
    def test_excel_roundtrip_indexname(self, merge_cells, path):
 | 
						|
        df = DataFrame(np.random.randn(10, 4))
 | 
						|
        df.index.name = "foo"
 | 
						|
 | 
						|
        df.to_excel(path, merge_cells=merge_cells)
 | 
						|
 | 
						|
        with ExcelFile(path) as xf:
 | 
						|
            result = pd.read_excel(xf, sheet_name=xf.sheet_names[0], index_col=0)
 | 
						|
 | 
						|
        tm.assert_frame_equal(result, df)
 | 
						|
        assert result.index.name == "foo"
 | 
						|
 | 
						|
    def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path):
 | 
						|
        # datetime.date, not sure what to test here exactly
 | 
						|
 | 
						|
        # freq does not round-trip
 | 
						|
        index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None)
 | 
						|
        tsframe.index = index
 | 
						|
 | 
						|
        tsf = tsframe.copy()
 | 
						|
 | 
						|
        tsf.index = [x.date() for x in tsframe.index]
 | 
						|
        tsf.to_excel(path, "test1", merge_cells=merge_cells)
 | 
						|
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
 | 
						|
        tm.assert_frame_equal(tsframe, recons)
 | 
						|
 | 
						|
    def test_excel_date_datetime_format(self, engine, ext, path):
 | 
						|
        # see gh-4133
 | 
						|
        #
 | 
						|
        # Excel output format strings
 | 
						|
        df = DataFrame(
 | 
						|
            [
 | 
						|
                [date(2014, 1, 31), date(1999, 9, 24)],
 | 
						|
                [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],
 | 
						|
            ],
 | 
						|
            index=["DATE", "DATETIME"],
 | 
						|
            columns=["X", "Y"],
 | 
						|
        )
 | 
						|
        df_expected = DataFrame(
 | 
						|
            [
 | 
						|
                [datetime(2014, 1, 31), datetime(1999, 9, 24)],
 | 
						|
                [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],
 | 
						|
            ],
 | 
						|
            index=["DATE", "DATETIME"],
 | 
						|
            columns=["X", "Y"],
 | 
						|
        )
 | 
						|
 | 
						|
        with tm.ensure_clean(ext) as filename2:
 | 
						|
            with ExcelWriter(path) as writer1:
 | 
						|
                df.to_excel(writer1, "test1")
 | 
						|
 | 
						|
            with ExcelWriter(
 | 
						|
                filename2,
 | 
						|
                date_format="DD.MM.YYYY",
 | 
						|
                datetime_format="DD.MM.YYYY HH-MM-SS",
 | 
						|
            ) as writer2:
 | 
						|
                df.to_excel(writer2, "test1")
 | 
						|
 | 
						|
            with ExcelFile(path) as reader1:
 | 
						|
                rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0)
 | 
						|
 | 
						|
            with ExcelFile(filename2) as reader2:
 | 
						|
                rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0)
 | 
						|
 | 
						|
        tm.assert_frame_equal(rs1, rs2)
 | 
						|
 | 
						|
        # Since the reader returns a datetime object for dates,
 | 
						|
        # we need to use df_expected to check the result.
 | 
						|
        tm.assert_frame_equal(rs2, df_expected)
 | 
						|
 | 
						|
    def test_to_excel_interval_no_labels(self, path):
 | 
						|
        # see gh-19242
 | 
						|
        #
 | 
						|
        # Test writing Interval without labels.
 | 
						|
        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64)
 | 
						|
        expected = df.copy()
 | 
						|
 | 
						|
        df["new"] = pd.cut(df[0], 10)
 | 
						|
        expected["new"] = pd.cut(expected[0], 10).astype(str)
 | 
						|
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
        tm.assert_frame_equal(expected, recons)
 | 
						|
 | 
						|
    def test_to_excel_interval_labels(self, path):
 | 
						|
        # see gh-19242
 | 
						|
        #
 | 
						|
        # Test writing Interval with labels.
 | 
						|
        df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64)
 | 
						|
        expected = df.copy()
 | 
						|
        intervals = pd.cut(
 | 
						|
            df[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]
 | 
						|
        )
 | 
						|
        df["new"] = intervals
 | 
						|
        expected["new"] = pd.Series(list(intervals))
 | 
						|
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
        tm.assert_frame_equal(expected, recons)
 | 
						|
 | 
						|
    def test_to_excel_timedelta(self, path):
 | 
						|
        # see gh-19242, gh-9155
 | 
						|
        #
 | 
						|
        # Test writing timedelta to xls.
 | 
						|
        df = DataFrame(
 | 
						|
            np.random.randint(-10, 10, size=(20, 1)), columns=["A"], dtype=np.int64
 | 
						|
        )
 | 
						|
        expected = df.copy()
 | 
						|
 | 
						|
        df["new"] = df["A"].apply(lambda x: timedelta(seconds=x))
 | 
						|
        expected["new"] = expected["A"].apply(
 | 
						|
            lambda x: timedelta(seconds=x).total_seconds() / 86400
 | 
						|
        )
 | 
						|
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
        tm.assert_frame_equal(expected, recons)
 | 
						|
 | 
						|
    def test_to_excel_periodindex(self, tsframe, path):
 | 
						|
        xp = tsframe.resample("M", kind="period").mean()
 | 
						|
 | 
						|
        xp.to_excel(path, "sht1")
 | 
						|
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            rs = pd.read_excel(reader, sheet_name="sht1", index_col=0)
 | 
						|
        tm.assert_frame_equal(xp, rs.to_period("M"))
 | 
						|
 | 
						|
    def test_to_excel_multiindex(self, merge_cells, frame, path):
 | 
						|
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
 | 
						|
        new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
 | 
						|
        frame.index = new_index
 | 
						|
 | 
						|
        frame.to_excel(path, "test1", header=False)
 | 
						|
        frame.to_excel(path, "test1", columns=["A", "B"])
 | 
						|
 | 
						|
        # round trip
 | 
						|
        frame.to_excel(path, "test1", merge_cells=merge_cells)
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            df = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1])
 | 
						|
        tm.assert_frame_equal(frame, df)
 | 
						|
 | 
						|
    # GH13511
 | 
						|
    def test_to_excel_multiindex_nan_label(self, merge_cells, path):
 | 
						|
        df = DataFrame({"A": [None, 2, 3], "B": [10, 20, 30], "C": np.random.sample(3)})
 | 
						|
        df = df.set_index(["A", "B"])
 | 
						|
 | 
						|
        df.to_excel(path, merge_cells=merge_cells)
 | 
						|
        df1 = pd.read_excel(path, index_col=[0, 1])
 | 
						|
        tm.assert_frame_equal(df, df1)
 | 
						|
 | 
						|
    # Test for Issue 11328. If column indices are integers, make
 | 
						|
    # sure they are handled correctly for either setting of
 | 
						|
    # merge_cells
 | 
						|
    def test_to_excel_multiindex_cols(self, merge_cells, frame, path):
 | 
						|
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
 | 
						|
        new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
 | 
						|
        frame.index = new_index
 | 
						|
 | 
						|
        new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)])
 | 
						|
        frame.columns = new_cols_index
 | 
						|
        header = [0, 1]
 | 
						|
        if not merge_cells:
 | 
						|
            header = 0
 | 
						|
 | 
						|
        # round trip
 | 
						|
        frame.to_excel(path, "test1", merge_cells=merge_cells)
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            df = pd.read_excel(
 | 
						|
                reader, sheet_name="test1", header=header, index_col=[0, 1]
 | 
						|
            )
 | 
						|
        if not merge_cells:
 | 
						|
            fm = frame.columns.format(sparsify=False, adjoin=False, names=False)
 | 
						|
            frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
 | 
						|
        tm.assert_frame_equal(frame, df)
 | 
						|
 | 
						|
    def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path):
 | 
						|
        # try multiindex with dates
 | 
						|
        new_index = [tsframe.index, np.arange(len(tsframe.index))]
 | 
						|
        tsframe.index = MultiIndex.from_arrays(new_index)
 | 
						|
 | 
						|
        tsframe.index.names = ["time", "foo"]
 | 
						|
        tsframe.to_excel(path, "test1", merge_cells=merge_cells)
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1])
 | 
						|
 | 
						|
        tm.assert_frame_equal(tsframe, recons)
 | 
						|
        assert recons.index.names == ("time", "foo")
 | 
						|
 | 
						|
    def test_to_excel_multiindex_no_write_index(self, path):
 | 
						|
        # Test writing and re-reading a MI without the index. GH 5616.
 | 
						|
 | 
						|
        # Initial non-MI frame.
 | 
						|
        frame1 = DataFrame({"a": [10, 20], "b": [30, 40], "c": [50, 60]})
 | 
						|
 | 
						|
        # Add a MI.
 | 
						|
        frame2 = frame1.copy()
 | 
						|
        multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)])
 | 
						|
        frame2.index = multi_index
 | 
						|
 | 
						|
        # Write out to Excel without the index.
 | 
						|
        frame2.to_excel(path, "test1", index=False)
 | 
						|
 | 
						|
        # Read it back in.
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            frame3 = pd.read_excel(reader, sheet_name="test1")
 | 
						|
 | 
						|
        # Test that it is the same as the initial frame.
 | 
						|
        tm.assert_frame_equal(frame1, frame3)
 | 
						|
 | 
						|
    def test_to_excel_float_format(self, path):
 | 
						|
        df = DataFrame(
 | 
						|
            [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
 | 
						|
            index=["A", "B"],
 | 
						|
            columns=["X", "Y", "Z"],
 | 
						|
        )
 | 
						|
        df.to_excel(path, "test1", float_format="%.2f")
 | 
						|
 | 
						|
        with ExcelFile(path) as reader:
 | 
						|
            result = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
 | 
						|
        expected = DataFrame(
 | 
						|
            [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
 | 
						|
            index=["A", "B"],
 | 
						|
            columns=["X", "Y", "Z"],
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_to_excel_output_encoding(self, ext):
 | 
						|
        # Avoid mixed inferred_type.
 | 
						|
        df = DataFrame(
 | 
						|
            [["\u0192", "\u0193", "\u0194"], ["\u0195", "\u0196", "\u0197"]],
 | 
						|
            index=["A\u0192", "B"],
 | 
						|
            columns=["X\u0193", "Y", "Z"],
 | 
						|
        )
 | 
						|
 | 
						|
        with tm.ensure_clean("__tmp_to_excel_float_format__." + ext) as filename:
 | 
						|
            df.to_excel(filename, sheet_name="TestSheet", encoding="utf8")
 | 
						|
            result = pd.read_excel(filename, sheet_name="TestSheet", index_col=0)
 | 
						|
            tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
    def test_to_excel_unicode_filename(self, ext, path):
 | 
						|
        with tm.ensure_clean("\u0192u." + ext) as filename:
 | 
						|
            try:
 | 
						|
                f = open(filename, "wb")
 | 
						|
            except UnicodeEncodeError:
 | 
						|
                pytest.skip("No unicode file names on this system")
 | 
						|
            finally:
 | 
						|
                f.close()
 | 
						|
 | 
						|
            df = DataFrame(
 | 
						|
                [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
 | 
						|
                index=["A", "B"],
 | 
						|
                columns=["X", "Y", "Z"],
 | 
						|
            )
 | 
						|
            df.to_excel(filename, "test1", float_format="%.2f")
 | 
						|
 | 
						|
            with ExcelFile(filename) as reader:
 | 
						|
                result = pd.read_excel(reader, sheet_name="test1", index_col=0)
 | 
						|
 | 
						|
        expected = DataFrame(
 | 
						|
            [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
 | 
						|
            index=["A", "B"],
 | 
						|
            columns=["X", "Y", "Z"],
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("use_headers", [True, False])
 | 
						|
    @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3])
 | 
						|
    @pytest.mark.parametrize("c_idx_nlevels", [1, 2, 3])
 | 
						|
    def test_excel_010_hemstring(
 | 
						|
        self, merge_cells, c_idx_nlevels, r_idx_nlevels, use_headers, path
 | 
						|
    ):
 | 
						|
        def roundtrip(data, header=True, parser_hdr=0, index=True):
 | 
						|
            data.to_excel(path, header=header, merge_cells=merge_cells, index=index)
 | 
						|
 | 
						|
            with ExcelFile(path) as xf:
 | 
						|
                return pd.read_excel(
 | 
						|
                    xf, sheet_name=xf.sheet_names[0], header=parser_hdr
 | 
						|
                )
 | 
						|
 | 
						|
        # Basic test.
 | 
						|
        parser_header = 0 if use_headers else None
 | 
						|
        res = roundtrip(DataFrame([0]), use_headers, parser_header)
 | 
						|
 | 
						|
        assert res.shape == (1, 2)
 | 
						|
        assert res.iloc[0, 0] is not np.nan
 | 
						|
 | 
						|
        # More complex tests with multi-index.
 | 
						|
        nrows = 5
 | 
						|
        ncols = 3
 | 
						|
 | 
						|
        # ensure limited functionality in 0.10
 | 
						|
        # override of gh-2370 until sorted out in 0.11
 | 
						|
 | 
						|
        df = tm.makeCustomDataframe(
 | 
						|
            nrows, ncols, r_idx_nlevels=r_idx_nlevels, c_idx_nlevels=c_idx_nlevels
 | 
						|
        )
 | 
						|
 | 
						|
        # This if will be removed once multi-column Excel writing
 | 
						|
        # is implemented. For now fixing gh-9794.
 | 
						|
        if c_idx_nlevels > 1:
 | 
						|
            msg = (
 | 
						|
                "Writing to Excel with MultiIndex columns and no index "
 | 
						|
                "\\('index'=False\\) is not yet implemented."
 | 
						|
            )
 | 
						|
            with pytest.raises(NotImplementedError, match=msg):
 | 
						|
                roundtrip(df, use_headers, index=False)
 | 
						|
        else:
 | 
						|
            res = roundtrip(df, use_headers)
 | 
						|
 | 
						|
            if use_headers:
 | 
						|
                assert res.shape == (nrows, ncols + r_idx_nlevels)
 | 
						|
            else:
 | 
						|
                # First row taken as columns.
 | 
						|
                assert res.shape == (nrows - 1, ncols + r_idx_nlevels)
 | 
						|
 | 
						|
            # No NaNs.
 | 
						|
            for r in range(len(res.index)):
 | 
						|
                for c in range(len(res.columns)):
 | 
						|
                    assert res.iloc[r, c] is not np.nan
 | 
						|
 | 
						|
    def test_duplicated_columns(self, path):
 | 
						|
        # see gh-5235
 | 
						|
        df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B"])
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
        expected = DataFrame(
 | 
						|
            [[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B.1"]
 | 
						|
        )
 | 
						|
 | 
						|
        # By default, we mangle.
 | 
						|
        result = pd.read_excel(path, sheet_name="test1", index_col=0)
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        # Explicitly, we pass in the parameter.
 | 
						|
        result = pd.read_excel(
 | 
						|
            path, sheet_name="test1", index_col=0, mangle_dupe_cols=True
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        # see gh-11007, gh-10970
 | 
						|
        df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A", "B"])
 | 
						|
        df.to_excel(path, "test1")
 | 
						|
 | 
						|
        result = pd.read_excel(path, sheet_name="test1", index_col=0)
 | 
						|
        expected = DataFrame(
 | 
						|
            [[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"]
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        # see gh-10982
 | 
						|
        df.to_excel(path, "test1", index=False, header=False)
 | 
						|
        result = pd.read_excel(path, sheet_name="test1", header=None)
 | 
						|
 | 
						|
        expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
        msg = "Setting mangle_dupe_cols=False is not supported yet"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            pd.read_excel(path, sheet_name="test1", header=None, mangle_dupe_cols=False)
 | 
						|
 | 
						|
    def test_swapped_columns(self, path):
 | 
						|
        # Test for issue #5427.
 | 
						|
        write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]})
 | 
						|
        write_frame.to_excel(path, "test1", columns=["B", "A"])
 | 
						|
 | 
						|
        read_frame = pd.read_excel(path, sheet_name="test1", header=0)
 | 
						|
 | 
						|
        tm.assert_series_equal(write_frame["A"], read_frame["A"])
 | 
						|
        tm.assert_series_equal(write_frame["B"], read_frame["B"])
 | 
						|
 | 
						|
    def test_invalid_columns(self, path):
 | 
						|
        # see gh-10982
 | 
						|
        write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]})
 | 
						|
 | 
						|
        with pytest.raises(KeyError, match="Not all names specified"):
 | 
						|
            write_frame.to_excel(path, "test1", columns=["B", "C"])
 | 
						|
 | 
						|
        with pytest.raises(
 | 
						|
            KeyError, match="'passes columns are not ALL present dataframe'"
 | 
						|
        ):
 | 
						|
            write_frame.to_excel(path, "test1", columns=["C", "D"])
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "to_excel_index,read_excel_index_col",
 | 
						|
        [
 | 
						|
            (True, 0),  # Include index in write to file
 | 
						|
            (False, None),  # Dont include index in write to file
 | 
						|
        ],
 | 
						|
    )
 | 
						|
    def test_write_subset_columns(self, path, to_excel_index, read_excel_index_col):
 | 
						|
        # GH 31677
 | 
						|
        write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2], "C": [3, 3, 3]})
 | 
						|
        write_frame.to_excel(
 | 
						|
            path, "col_subset_bug", columns=["A", "B"], index=to_excel_index
 | 
						|
        )
 | 
						|
 | 
						|
        expected = write_frame[["A", "B"]]
 | 
						|
        read_frame = pd.read_excel(
 | 
						|
            path, sheet_name="col_subset_bug", index_col=read_excel_index_col
 | 
						|
        )
 | 
						|
 | 
						|
        tm.assert_frame_equal(expected, read_frame)
 | 
						|
 | 
						|
    def test_comment_arg(self, path):
 | 
						|
        # see gh-18735
 | 
						|
        #
 | 
						|
        # Test the comment argument functionality to pd.read_excel.
 | 
						|
 | 
						|
        # Create file to read in.
 | 
						|
        df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]})
 | 
						|
        df.to_excel(path, "test_c")
 | 
						|
 | 
						|
        # Read file without comment arg.
 | 
						|
        result1 = pd.read_excel(path, sheet_name="test_c", index_col=0)
 | 
						|
 | 
						|
        result1.iloc[1, 0] = None
 | 
						|
        result1.iloc[1, 1] = None
 | 
						|
        result1.iloc[2, 1] = None
 | 
						|
 | 
						|
        result2 = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0)
 | 
						|
        tm.assert_frame_equal(result1, result2)
 | 
						|
 | 
						|
    def test_comment_default(self, path):
 | 
						|
        # Re issue #18735
 | 
						|
        # Test the comment argument default to pd.read_excel
 | 
						|
 | 
						|
        # Create file to read in
 | 
						|
        df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]})
 | 
						|
        df.to_excel(path, "test_c")
 | 
						|
 | 
						|
        # Read file with default and explicit comment=None
 | 
						|
        result1 = pd.read_excel(path, sheet_name="test_c")
 | 
						|
        result2 = pd.read_excel(path, sheet_name="test_c", comment=None)
 | 
						|
        tm.assert_frame_equal(result1, result2)
 | 
						|
 | 
						|
    def test_comment_used(self, path):
 | 
						|
        # see gh-18735
 | 
						|
        #
 | 
						|
        # Test the comment argument is working as expected when used.
 | 
						|
 | 
						|
        # Create file to read in.
 | 
						|
        df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]})
 | 
						|
        df.to_excel(path, "test_c")
 | 
						|
 | 
						|
        # Test read_frame_comment against manually produced expected output.
 | 
						|
        expected = DataFrame({"A": ["one", None, "one"], "B": ["two", None, None]})
 | 
						|
        result = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0)
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_comment_empty_line(self, path):
 | 
						|
        # Re issue #18735
 | 
						|
        # Test that pd.read_excel ignores commented lines at the end of file
 | 
						|
 | 
						|
        df = DataFrame({"a": ["1", "#2"], "b": ["2", "3"]})
 | 
						|
        df.to_excel(path, index=False)
 | 
						|
 | 
						|
        # Test that all-comment lines at EoF are ignored
 | 
						|
        expected = DataFrame({"a": [1], "b": [2]})
 | 
						|
        result = pd.read_excel(path, comment="#")
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_datetimes(self, path):
 | 
						|
 | 
						|
        # Test writing and reading datetimes. For issue #9139. (xref #9185)
 | 
						|
        datetimes = [
 | 
						|
            datetime(2013, 1, 13, 1, 2, 3),
 | 
						|
            datetime(2013, 1, 13, 2, 45, 56),
 | 
						|
            datetime(2013, 1, 13, 4, 29, 49),
 | 
						|
            datetime(2013, 1, 13, 6, 13, 42),
 | 
						|
            datetime(2013, 1, 13, 7, 57, 35),
 | 
						|
            datetime(2013, 1, 13, 9, 41, 28),
 | 
						|
            datetime(2013, 1, 13, 11, 25, 21),
 | 
						|
            datetime(2013, 1, 13, 13, 9, 14),
 | 
						|
            datetime(2013, 1, 13, 14, 53, 7),
 | 
						|
            datetime(2013, 1, 13, 16, 37, 0),
 | 
						|
            datetime(2013, 1, 13, 18, 20, 52),
 | 
						|
        ]
 | 
						|
 | 
						|
        write_frame = DataFrame({"A": datetimes})
 | 
						|
        write_frame.to_excel(path, "Sheet1")
 | 
						|
        if path.endswith("xlsx") or path.endswith("xlsm"):
 | 
						|
            pytest.skip(
 | 
						|
                "Defaults to openpyxl and fails with floating point error on "
 | 
						|
                "datetimes; may be fixed on newer versions of openpyxl - GH #38644"
 | 
						|
            )
 | 
						|
        read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0)
 | 
						|
 | 
						|
        tm.assert_series_equal(write_frame["A"], read_frame["A"])
 | 
						|
 | 
						|
    def test_bytes_io(self, engine):
 | 
						|
        # see gh-7074
 | 
						|
        with BytesIO() as bio:
 | 
						|
            df = DataFrame(np.random.randn(10, 2))
 | 
						|
 | 
						|
            # Pass engine explicitly, as there is no file path to infer from.
 | 
						|
            with ExcelWriter(bio, engine=engine) as writer:
 | 
						|
                df.to_excel(writer)
 | 
						|
 | 
						|
            bio.seek(0)
 | 
						|
            reread_df = pd.read_excel(bio, index_col=0)
 | 
						|
            tm.assert_frame_equal(df, reread_df)
 | 
						|
 | 
						|
    def test_write_lists_dict(self, path):
 | 
						|
        # see gh-8188.
 | 
						|
        df = DataFrame(
 | 
						|
            {
 | 
						|
                "mixed": ["a", ["b", "c"], {"d": "e", "f": 2}],
 | 
						|
                "numeric": [1, 2, 3.0],
 | 
						|
                "str": ["apple", "banana", "cherry"],
 | 
						|
            }
 | 
						|
        )
 | 
						|
        df.to_excel(path, "Sheet1")
 | 
						|
        read = pd.read_excel(path, sheet_name="Sheet1", header=0, index_col=0)
 | 
						|
 | 
						|
        expected = df.copy()
 | 
						|
        expected.mixed = expected.mixed.apply(str)
 | 
						|
        expected.numeric = expected.numeric.astype("int64")
 | 
						|
 | 
						|
        tm.assert_frame_equal(read, expected)
 | 
						|
 | 
						|
    def test_render_as_column_name(self, path):
 | 
						|
        # see gh-34331
 | 
						|
        df = DataFrame({"render": [1, 2], "data": [3, 4]})
 | 
						|
        df.to_excel(path, "Sheet1")
 | 
						|
        read = pd.read_excel(path, "Sheet1", index_col=0)
 | 
						|
        expected = df
 | 
						|
        tm.assert_frame_equal(read, expected)
 | 
						|
 | 
						|
    def test_true_and_false_value_options(self, path):
 | 
						|
        # see gh-13347
 | 
						|
        df = DataFrame([["foo", "bar"]], columns=["col1", "col2"])
 | 
						|
        expected = df.replace({"foo": True, "bar": False})
 | 
						|
 | 
						|
        df.to_excel(path)
 | 
						|
        read_frame = pd.read_excel(
 | 
						|
            path, true_values=["foo"], false_values=["bar"], index_col=0
 | 
						|
        )
 | 
						|
        tm.assert_frame_equal(read_frame, expected)
 | 
						|
 | 
						|
    def test_freeze_panes(self, path):
 | 
						|
        # see gh-15160
 | 
						|
        expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"])
 | 
						|
        expected.to_excel(path, "Sheet1", freeze_panes=(1, 1))
 | 
						|
 | 
						|
        result = pd.read_excel(path, index_col=0)
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_path_path_lib(self, engine, ext):
 | 
						|
        df = tm.makeDataFrame()
 | 
						|
        writer = partial(df.to_excel, engine=engine)
 | 
						|
 | 
						|
        reader = partial(pd.read_excel, index_col=0)
 | 
						|
        result = tm.round_trip_pathlib(writer, reader, path=f"foo{ext}")
 | 
						|
        tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
    def test_path_local_path(self, engine, ext):
 | 
						|
        df = tm.makeDataFrame()
 | 
						|
        writer = partial(df.to_excel, engine=engine)
 | 
						|
 | 
						|
        reader = partial(pd.read_excel, index_col=0)
 | 
						|
        result = tm.round_trip_localpath(writer, reader, path=f"foo{ext}")
 | 
						|
        tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
    def test_merged_cell_custom_objects(self, merge_cells, path):
 | 
						|
        # see GH-27006
 | 
						|
        mi = MultiIndex.from_tuples(
 | 
						|
            [
 | 
						|
                (pd.Period("2018"), pd.Period("2018Q1")),
 | 
						|
                (pd.Period("2018"), pd.Period("2018Q2")),
 | 
						|
            ]
 | 
						|
        )
 | 
						|
        expected = DataFrame(np.ones((2, 2)), columns=mi)
 | 
						|
        expected.to_excel(path)
 | 
						|
        with tm.assert_produces_warning(
 | 
						|
            FutureWarning, match="convert_float is deprecated"
 | 
						|
        ):
 | 
						|
            result = pd.read_excel(
 | 
						|
                path, header=[0, 1], index_col=0, convert_float=False
 | 
						|
            )
 | 
						|
        # need to convert PeriodIndexes to standard Indexes for assert equal
 | 
						|
        expected.columns = expected.columns.set_levels(
 | 
						|
            [[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]],
 | 
						|
            level=[0, 1],
 | 
						|
        )
 | 
						|
        expected.index = expected.index.astype(np.float64)
 | 
						|
        tm.assert_frame_equal(expected, result)
 | 
						|
 | 
						|
    @pytest.mark.parametrize("dtype", [None, object])
 | 
						|
    def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path):
 | 
						|
        # GH 27008, GH 7056
 | 
						|
        tz = tz_aware_fixture
 | 
						|
        data = pd.Timestamp("2019", tz=tz)
 | 
						|
        df = DataFrame([data], dtype=dtype)
 | 
						|
        with pytest.raises(ValueError, match="Excel does not support"):
 | 
						|
            df.to_excel(path)
 | 
						|
 | 
						|
        data = data.to_pydatetime()
 | 
						|
        df = DataFrame([data], dtype=dtype)
 | 
						|
        with pytest.raises(ValueError, match="Excel does not support"):
 | 
						|
            df.to_excel(path)
 | 
						|
 | 
						|
    def test_excel_duplicate_columns_with_names(self, path):
 | 
						|
        # GH#39695
 | 
						|
        df = DataFrame({"A": [0, 1], "B": [10, 11]})
 | 
						|
        df.to_excel(path, columns=["A", "B", "A"], index=False)
 | 
						|
 | 
						|
        result = pd.read_excel(path)
 | 
						|
        expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"])
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
    def test_if_sheet_exists_raises(self, ext):
 | 
						|
        # GH 40230
 | 
						|
        msg = "if_sheet_exists is only valid in append mode (mode='a')"
 | 
						|
 | 
						|
        with tm.ensure_clean(ext) as f:
 | 
						|
            with pytest.raises(ValueError, match=re.escape(msg)):
 | 
						|
                ExcelWriter(f, if_sheet_exists="replace")
 | 
						|
 | 
						|
 | 
						|
class TestExcelWriterEngineTests:
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "klass,ext",
 | 
						|
        [
 | 
						|
            pytest.param(_XlsxWriter, ".xlsx", marks=td.skip_if_no("xlsxwriter")),
 | 
						|
            pytest.param(_OpenpyxlWriter, ".xlsx", marks=td.skip_if_no("openpyxl")),
 | 
						|
            pytest.param(_XlwtWriter, ".xls", marks=td.skip_if_no("xlwt")),
 | 
						|
        ],
 | 
						|
    )
 | 
						|
    def test_ExcelWriter_dispatch(self, klass, ext):
 | 
						|
        with tm.ensure_clean(ext) as path:
 | 
						|
            with ExcelWriter(path) as writer:
 | 
						|
                if ext == ".xlsx" and td.safe_import("xlsxwriter"):
 | 
						|
                    # xlsxwriter has preference over openpyxl if both installed
 | 
						|
                    assert isinstance(writer, _XlsxWriter)
 | 
						|
                else:
 | 
						|
                    assert isinstance(writer, klass)
 | 
						|
 | 
						|
    def test_ExcelWriter_dispatch_raises(self):
 | 
						|
        with pytest.raises(ValueError, match="No engine"):
 | 
						|
            ExcelWriter("nothing")
 | 
						|
 | 
						|
    def test_register_writer(self):
 | 
						|
        # some awkward mocking to test out dispatch and such actually works
 | 
						|
        called_save = []
 | 
						|
        called_write_cells = []
 | 
						|
 | 
						|
        class DummyClass(ExcelWriter):
 | 
						|
            called_save = False
 | 
						|
            called_write_cells = False
 | 
						|
            supported_extensions = ["xlsx", "xls"]
 | 
						|
            engine = "dummy"
 | 
						|
 | 
						|
            def save(self):
 | 
						|
                called_save.append(True)
 | 
						|
 | 
						|
            def write_cells(self, *args, **kwargs):
 | 
						|
                called_write_cells.append(True)
 | 
						|
 | 
						|
        def check_called(func):
 | 
						|
            func()
 | 
						|
            assert len(called_save) >= 1
 | 
						|
            assert len(called_write_cells) >= 1
 | 
						|
            del called_save[:]
 | 
						|
            del called_write_cells[:]
 | 
						|
 | 
						|
        with pd.option_context("io.excel.xlsx.writer", "dummy"):
 | 
						|
            path = "something.xlsx"
 | 
						|
            with tm.ensure_clean(path) as filepath:
 | 
						|
                register_writer(DummyClass)
 | 
						|
                with ExcelWriter(filepath) as writer:
 | 
						|
                    assert isinstance(writer, DummyClass)
 | 
						|
                df = tm.makeCustomDataframe(1, 1)
 | 
						|
                check_called(lambda: df.to_excel(filepath))
 | 
						|
            with tm.ensure_clean("something.xls") as filepath:
 | 
						|
                check_called(lambda: df.to_excel(filepath, engine="dummy"))
 | 
						|
 | 
						|
    @pytest.mark.parametrize(
 | 
						|
        "ext",
 | 
						|
        [
 | 
						|
            pytest.param(".xlsx", marks=td.skip_if_no("xlsxwriter")),
 | 
						|
            pytest.param(".xlsx", marks=td.skip_if_no("openpyxl")),
 | 
						|
            pytest.param(".ods", marks=td.skip_if_no("odf")),
 | 
						|
        ],
 | 
						|
    )
 | 
						|
    def test_engine_kwargs_and_kwargs_raises(self, ext):
 | 
						|
        # GH 40430
 | 
						|
        msg = re.escape("Cannot use both engine_kwargs and **kwargs")
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            with ExcelWriter("", engine_kwargs={"a": 1}, b=2):
 | 
						|
                pass
 | 
						|
 | 
						|
 | 
						|
@td.skip_if_no("xlrd")
 | 
						|
@td.skip_if_no("openpyxl")
 | 
						|
class TestFSPath:
 | 
						|
    def test_excelfile_fspath(self):
 | 
						|
        with tm.ensure_clean("foo.xlsx") as path:
 | 
						|
            df = DataFrame({"A": [1, 2]})
 | 
						|
            df.to_excel(path)
 | 
						|
            with ExcelFile(path) as xl:
 | 
						|
                result = os.fspath(xl)
 | 
						|
            assert result == path
 | 
						|
 | 
						|
    def test_excelwriter_fspath(self):
 | 
						|
        with tm.ensure_clean("foo.xlsx") as path:
 | 
						|
            with ExcelWriter(path) as writer:
 | 
						|
                assert os.fspath(writer) == str(path)
 |