针对pulse-transit的工具
This commit is contained in:
7
dist/client/pandas/tests/frame/methods/__init__.py
vendored
Normal file
7
dist/client/pandas/tests/frame/methods/__init__.py
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Test files dedicated to individual (stand-alone) DataFrame methods
|
||||
|
||||
Ideally these files/tests should correspond 1-to-1 with tests.series.methods
|
||||
|
||||
These may also present opportunities for sharing/de-duplicating test code.
|
||||
"""
|
||||
BIN
dist/client/pandas/tests/frame/methods/__pycache__/__init__.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/__init__.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_add_prefix_suffix.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_add_prefix_suffix.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_align.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_align.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_append.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_append.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_asfreq.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_asfreq.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_asof.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_asof.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_assign.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_assign.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_astype.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_astype.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_at_time.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_at_time.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_between_time.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_between_time.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_clip.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_clip.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_combine.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_combine.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_combine_first.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_combine_first.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_compare.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_compare.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_convert.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_convert.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_convert_dtypes.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_convert_dtypes.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_copy.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_copy.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_count.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_count.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_count_with_level_deprecated.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_count_with_level_deprecated.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_cov_corr.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_cov_corr.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_describe.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_describe.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_diff.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_diff.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_dot.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_dot.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_drop.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_drop.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_drop_duplicates.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_drop_duplicates.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_droplevel.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_droplevel.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_dropna.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_dropna.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_dtypes.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_dtypes.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_duplicated.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_duplicated.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_equals.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_equals.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_explode.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_explode.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_fillna.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_fillna.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_filter.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_filter.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_first_and_last.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_first_and_last.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_first_valid_index.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_first_valid_index.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_get_numeric_data.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_get_numeric_data.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_head_tail.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_head_tail.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_infer_objects.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_infer_objects.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_interpolate.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_interpolate.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_is_homogeneous_dtype.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_is_homogeneous_dtype.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_isin.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_isin.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_join.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_join.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_matmul.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_matmul.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_nlargest.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_nlargest.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_pct_change.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_pct_change.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_pipe.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_pipe.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_pop.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_pop.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_quantile.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_quantile.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_rank.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_rank.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_reindex.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_reindex.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_reindex_like.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_reindex_like.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_rename.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_rename.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_rename_axis.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_rename_axis.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_reorder_levels.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_reorder_levels.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_replace.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_replace.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_reset_index.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_reset_index.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_round.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_round.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_sample.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_sample.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_select_dtypes.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_select_dtypes.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_set_axis.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_set_axis.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_set_index.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_set_index.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_shift.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_shift.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_sort_index.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_sort_index.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_sort_values.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_sort_values.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_swapaxes.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_swapaxes.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_swaplevel.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_swaplevel.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_csv.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_csv.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_dict.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_dict.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_dict_of_blocks.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_dict_of_blocks.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_numpy.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_numpy.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_period.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_period.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_records.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_records.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_timestamp.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_to_timestamp.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_transpose.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_transpose.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_truncate.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_truncate.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_tz_convert.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_tz_convert.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_tz_localize.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_tz_localize.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_update.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_update.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_value_counts.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_value_counts.cpython-310.pyc
vendored
Normal file
Binary file not shown.
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_values.cpython-310.pyc
vendored
Normal file
BIN
dist/client/pandas/tests/frame/methods/__pycache__/test_values.cpython-310.pyc
vendored
Normal file
Binary file not shown.
20
dist/client/pandas/tests/frame/methods/test_add_prefix_suffix.py
vendored
Normal file
20
dist/client/pandas/tests/frame/methods/test_add_prefix_suffix.py
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_add_prefix_suffix(float_frame):
|
||||
with_prefix = float_frame.add_prefix("foo#")
|
||||
expected = Index([f"foo#{c}" for c in float_frame.columns])
|
||||
tm.assert_index_equal(with_prefix.columns, expected)
|
||||
|
||||
with_suffix = float_frame.add_suffix("#foo")
|
||||
expected = Index([f"{c}#foo" for c in float_frame.columns])
|
||||
tm.assert_index_equal(with_suffix.columns, expected)
|
||||
|
||||
with_pct_prefix = float_frame.add_prefix("%")
|
||||
expected = Index([f"%{c}" for c in float_frame.columns])
|
||||
tm.assert_index_equal(with_pct_prefix.columns, expected)
|
||||
|
||||
with_pct_suffix = float_frame.add_suffix("%")
|
||||
expected = Index([f"{c}%" for c in float_frame.columns])
|
||||
tm.assert_index_equal(with_pct_suffix.columns, expected)
|
||||
304
dist/client/pandas/tests/frame/methods/test_align.py
vendored
Normal file
304
dist/client/pandas/tests/frame/methods/test_align.py
vendored
Normal file
@@ -0,0 +1,304 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameAlign:
|
||||
def test_frame_align_aware(self):
|
||||
idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern")
|
||||
idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern")
|
||||
df1 = DataFrame(np.random.randn(len(idx1), 3), idx1)
|
||||
df2 = DataFrame(np.random.randn(len(idx2), 3), idx2)
|
||||
new1, new2 = df1.align(df2)
|
||||
assert df1.index.tz == new1.index.tz
|
||||
assert df2.index.tz == new2.index.tz
|
||||
|
||||
# different timezones convert to UTC
|
||||
|
||||
# frame with frame
|
||||
df1_central = df1.tz_convert("US/Central")
|
||||
new1, new2 = df1.align(df1_central)
|
||||
assert new1.index.tz == pytz.UTC
|
||||
assert new2.index.tz == pytz.UTC
|
||||
|
||||
# frame with Series
|
||||
new1, new2 = df1.align(df1_central[0], axis=0)
|
||||
assert new1.index.tz == pytz.UTC
|
||||
assert new2.index.tz == pytz.UTC
|
||||
|
||||
df1[0].align(df1_central, axis=0)
|
||||
assert new1.index.tz == pytz.UTC
|
||||
assert new2.index.tz == pytz.UTC
|
||||
|
||||
def test_align_float(self, float_frame):
|
||||
af, bf = float_frame.align(float_frame)
|
||||
assert af._mgr is not float_frame._mgr
|
||||
|
||||
af, bf = float_frame.align(float_frame, copy=False)
|
||||
assert af._mgr is float_frame._mgr
|
||||
|
||||
# axis = 0
|
||||
other = float_frame.iloc[:-5, :3]
|
||||
af, bf = float_frame.align(other, axis=0, fill_value=-1)
|
||||
|
||||
tm.assert_index_equal(bf.columns, other.columns)
|
||||
|
||||
# test fill value
|
||||
join_idx = float_frame.index.join(other.index)
|
||||
diff_a = float_frame.index.difference(join_idx)
|
||||
diff_a_vals = af.reindex(diff_a).values
|
||||
assert (diff_a_vals == -1).all()
|
||||
|
||||
af, bf = float_frame.align(other, join="right", axis=0)
|
||||
tm.assert_index_equal(bf.columns, other.columns)
|
||||
tm.assert_index_equal(bf.index, other.index)
|
||||
tm.assert_index_equal(af.index, other.index)
|
||||
|
||||
# axis = 1
|
||||
other = float_frame.iloc[:-5, :3].copy()
|
||||
af, bf = float_frame.align(other, axis=1)
|
||||
tm.assert_index_equal(bf.columns, float_frame.columns)
|
||||
tm.assert_index_equal(bf.index, other.index)
|
||||
|
||||
# test fill value
|
||||
join_idx = float_frame.index.join(other.index)
|
||||
diff_a = float_frame.index.difference(join_idx)
|
||||
diff_a_vals = af.reindex(diff_a).values
|
||||
|
||||
assert (diff_a_vals == -1).all()
|
||||
|
||||
af, bf = float_frame.align(other, join="inner", axis=1)
|
||||
tm.assert_index_equal(bf.columns, other.columns)
|
||||
|
||||
af, bf = float_frame.align(other, join="inner", axis=1, method="pad")
|
||||
tm.assert_index_equal(bf.columns, other.columns)
|
||||
|
||||
af, bf = float_frame.align(
|
||||
other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=None
|
||||
)
|
||||
tm.assert_index_equal(bf.index, Index([]))
|
||||
|
||||
af, bf = float_frame.align(
|
||||
other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0
|
||||
)
|
||||
tm.assert_index_equal(bf.index, Index([]))
|
||||
|
||||
# Try to align DataFrame to Series along bad axis
|
||||
msg = "No axis named 2 for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
float_frame.align(af.iloc[0, :3], join="inner", axis=2)
|
||||
|
||||
# align dataframe to series with broadcast or not
|
||||
idx = float_frame.index
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
|
||||
left, right = float_frame.align(s, axis=0)
|
||||
tm.assert_index_equal(left.index, float_frame.index)
|
||||
tm.assert_index_equal(right.index, float_frame.index)
|
||||
assert isinstance(right, Series)
|
||||
|
||||
left, right = float_frame.align(s, broadcast_axis=1)
|
||||
tm.assert_index_equal(left.index, float_frame.index)
|
||||
expected = {c: s for c in float_frame.columns}
|
||||
expected = DataFrame(
|
||||
expected, index=float_frame.index, columns=float_frame.columns
|
||||
)
|
||||
tm.assert_frame_equal(right, expected)
|
||||
|
||||
# see gh-9558
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
result = df[df["a"] == 2]
|
||||
expected = DataFrame([[2, 5]], index=[1], columns=["a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.where(df["a"] == 2, 0)
|
||||
expected = DataFrame({"a": [0, 2, 0], "b": [0, 5, 0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_align_int(self, int_frame):
|
||||
# test other non-float types
|
||||
other = DataFrame(index=range(5), columns=["A", "B", "C"])
|
||||
|
||||
af, bf = int_frame.align(other, join="inner", axis=1, method="pad")
|
||||
tm.assert_index_equal(bf.columns, other.columns)
|
||||
|
||||
def test_align_mixed_type(self, float_string_frame):
|
||||
|
||||
af, bf = float_string_frame.align(
|
||||
float_string_frame, join="inner", axis=1, method="pad"
|
||||
)
|
||||
tm.assert_index_equal(bf.columns, float_string_frame.columns)
|
||||
|
||||
def test_align_mixed_float(self, mixed_float_frame):
|
||||
# mixed floats/ints
|
||||
other = DataFrame(index=range(5), columns=["A", "B", "C"])
|
||||
|
||||
af, bf = mixed_float_frame.align(
|
||||
other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0
|
||||
)
|
||||
tm.assert_index_equal(bf.index, Index([]))
|
||||
|
||||
def test_align_mixed_int(self, mixed_int_frame):
|
||||
other = DataFrame(index=range(5), columns=["A", "B", "C"])
|
||||
|
||||
af, bf = mixed_int_frame.align(
|
||||
other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0
|
||||
)
|
||||
tm.assert_index_equal(bf.index, Index([]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"l_ordered,r_ordered,expected",
|
||||
[
|
||||
[True, True, pd.CategoricalIndex],
|
||||
[True, False, Index],
|
||||
[False, True, Index],
|
||||
[False, False, pd.CategoricalIndex],
|
||||
],
|
||||
)
|
||||
def test_align_categorical(self, l_ordered, r_ordered, expected):
|
||||
# GH-28397
|
||||
df_1 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
"B": Series(list("aabbca")).astype(
|
||||
pd.CategoricalDtype(list("cab"), ordered=l_ordered)
|
||||
),
|
||||
}
|
||||
).set_index("B")
|
||||
df_2 = DataFrame(
|
||||
{
|
||||
"A": np.arange(5, dtype="int64"),
|
||||
"B": Series(list("babca")).astype(
|
||||
pd.CategoricalDtype(list("cab"), ordered=r_ordered)
|
||||
),
|
||||
}
|
||||
).set_index("B")
|
||||
|
||||
aligned_1, aligned_2 = df_1.align(df_2)
|
||||
assert isinstance(aligned_1.index, expected)
|
||||
assert isinstance(aligned_2.index, expected)
|
||||
tm.assert_index_equal(aligned_1.index, aligned_2.index)
|
||||
|
||||
def test_align_multiindex(self):
|
||||
# GH#10665
|
||||
# same test cases as test_align_multiindex in test_series.py
|
||||
|
||||
midx = pd.MultiIndex.from_product(
|
||||
[range(2), range(3), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
idx = Index(range(2), name="b")
|
||||
df1 = DataFrame(np.arange(12, dtype="int64"), index=midx)
|
||||
df2 = DataFrame(np.arange(2, dtype="int64"), index=idx)
|
||||
|
||||
# these must be the same results (but flipped)
|
||||
res1l, res1r = df1.align(df2, join="left")
|
||||
res2l, res2r = df2.align(df1, join="right")
|
||||
|
||||
expl = df1
|
||||
tm.assert_frame_equal(expl, res1l)
|
||||
tm.assert_frame_equal(expl, res2r)
|
||||
expr = DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
|
||||
tm.assert_frame_equal(expr, res1r)
|
||||
tm.assert_frame_equal(expr, res2l)
|
||||
|
||||
res1l, res1r = df1.align(df2, join="right")
|
||||
res2l, res2r = df2.align(df1, join="left")
|
||||
|
||||
exp_idx = pd.MultiIndex.from_product(
|
||||
[range(2), range(2), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
expl = DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
|
||||
tm.assert_frame_equal(expl, res1l)
|
||||
tm.assert_frame_equal(expl, res2r)
|
||||
expr = DataFrame([0, 0, 1, 1] * 2, index=exp_idx)
|
||||
tm.assert_frame_equal(expr, res1r)
|
||||
tm.assert_frame_equal(expr, res2l)
|
||||
|
||||
def test_align_series_combinations(self):
|
||||
df = DataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE"))
|
||||
s = Series([1, 2, 4], index=list("ABD"), name="x")
|
||||
|
||||
# frame + series
|
||||
res1, res2 = df.align(s, axis=0)
|
||||
exp1 = DataFrame(
|
||||
{"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]},
|
||||
index=list("ABCDE"),
|
||||
)
|
||||
exp2 = Series([1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x")
|
||||
|
||||
tm.assert_frame_equal(res1, exp1)
|
||||
tm.assert_series_equal(res2, exp2)
|
||||
|
||||
# series + frame
|
||||
res1, res2 = s.align(df)
|
||||
tm.assert_series_equal(res1, exp2)
|
||||
tm.assert_frame_equal(res2, exp1)
|
||||
|
||||
def _check_align(self, a, b, axis, fill_axis, how, method, limit=None):
|
||||
aa, ab = a.align(
|
||||
b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis
|
||||
)
|
||||
|
||||
join_index, join_columns = None, None
|
||||
|
||||
ea, eb = a, b
|
||||
if axis is None or axis == 0:
|
||||
join_index = a.index.join(b.index, how=how)
|
||||
ea = ea.reindex(index=join_index)
|
||||
eb = eb.reindex(index=join_index)
|
||||
|
||||
if axis is None or axis == 1:
|
||||
join_columns = a.columns.join(b.columns, how=how)
|
||||
ea = ea.reindex(columns=join_columns)
|
||||
eb = eb.reindex(columns=join_columns)
|
||||
|
||||
ea = ea.fillna(axis=fill_axis, method=method, limit=limit)
|
||||
eb = eb.fillna(axis=fill_axis, method=method, limit=limit)
|
||||
|
||||
tm.assert_frame_equal(aa, ea)
|
||||
tm.assert_frame_equal(ab, eb)
|
||||
|
||||
@pytest.mark.parametrize("meth", ["pad", "bfill"])
|
||||
@pytest.mark.parametrize("ax", [0, 1, None])
|
||||
@pytest.mark.parametrize("fax", [0, 1])
|
||||
@pytest.mark.parametrize("how", ["inner", "outer", "left", "right"])
|
||||
def test_align_fill_method(self, how, meth, ax, fax, float_frame):
|
||||
df = float_frame
|
||||
self._check_align_fill(df, how, meth, ax, fax)
|
||||
|
||||
def _check_align_fill(self, frame, kind, meth, ax, fax):
|
||||
left = frame.iloc[0:4, :10]
|
||||
right = frame.iloc[2:, 6:]
|
||||
empty = frame.iloc[:0, :0]
|
||||
|
||||
self._check_align(left, right, axis=ax, fill_axis=fax, how=kind, method=meth)
|
||||
self._check_align(
|
||||
left, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1
|
||||
)
|
||||
|
||||
# empty left
|
||||
self._check_align(empty, right, axis=ax, fill_axis=fax, how=kind, method=meth)
|
||||
self._check_align(
|
||||
empty, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1
|
||||
)
|
||||
|
||||
# empty right
|
||||
self._check_align(left, empty, axis=ax, fill_axis=fax, how=kind, method=meth)
|
||||
self._check_align(
|
||||
left, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1
|
||||
)
|
||||
|
||||
# both empty
|
||||
self._check_align(empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth)
|
||||
self._check_align(
|
||||
empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1
|
||||
)
|
||||
285
dist/client/pandas/tests/frame/methods/test_append.py
vendored
Normal file
285
dist/client/pandas/tests/frame/methods/test_append.py
vendored
Normal file
@@ -0,0 +1,285 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameAppend:
|
||||
@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning")
|
||||
def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series):
|
||||
obj = multiindex_dataframe_random_data
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
a = obj[:5]
|
||||
b = obj[5:]
|
||||
|
||||
result = a.append(b)
|
||||
tm.assert_equal(result, obj)
|
||||
|
||||
def test_append_empty_list(self):
|
||||
# GH 28769
|
||||
df = DataFrame()
|
||||
result = df._append([])
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
assert result is not df
|
||||
|
||||
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
|
||||
result = df._append([])
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
assert result is not df # ._append() should return a new object
|
||||
|
||||
def test_append_series_dict(self):
|
||||
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
|
||||
|
||||
series = df.loc[4]
|
||||
msg = "Indexes have overlapping values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df._append(series, verify_integrity=True)
|
||||
|
||||
series.name = None
|
||||
msg = "Can only append a Series if ignore_index=True"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df._append(series, verify_integrity=True)
|
||||
|
||||
result = df._append(series[::-1], ignore_index=True)
|
||||
expected = df._append(
|
||||
DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# dict
|
||||
result = df._append(series.to_dict(), ignore_index=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df._append(series[::-1][:3], ignore_index=True)
|
||||
expected = df._append(
|
||||
DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True
|
||||
)
|
||||
tm.assert_frame_equal(result, expected.loc[:, result.columns])
|
||||
|
||||
msg = "Can only append a dict if ignore_index=True"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df._append(series.to_dict())
|
||||
|
||||
# can append when name set
|
||||
row = df.loc[4]
|
||||
row.name = 5
|
||||
result = df._append(row)
|
||||
expected = df._append(df[-1:], ignore_index=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_list_of_series_dicts(self):
|
||||
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
|
||||
|
||||
dicts = [x.to_dict() for idx, x in df.iterrows()]
|
||||
|
||||
result = df._append(dicts, ignore_index=True)
|
||||
expected = df._append(df, ignore_index=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# different columns
|
||||
dicts = [
|
||||
{"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4},
|
||||
{"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8},
|
||||
]
|
||||
result = df._append(dicts, ignore_index=True, sort=True)
|
||||
expected = df._append(DataFrame(dicts), ignore_index=True, sort=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_list_retain_index_name(self):
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4]], index=pd.Index(["a", "b"], name="keepthisname")
|
||||
)
|
||||
|
||||
serc = Series([5, 6], name="c")
|
||||
|
||||
expected = DataFrame(
|
||||
[[1, 2], [3, 4], [5, 6]],
|
||||
index=pd.Index(["a", "b", "c"], name="keepthisname"),
|
||||
)
|
||||
|
||||
# append series
|
||||
result = df._append(serc)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# append list of series
|
||||
result = df._append([serc])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_missing_cols(self):
|
||||
# GH22252
|
||||
# exercise the conditional branch in append method where the data
|
||||
# to be appended is a list and does not contain all columns that are in
|
||||
# the target DataFrame
|
||||
df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])
|
||||
|
||||
dicts = [{"foo": 9}, {"bar": 10}]
|
||||
result = df._append(dicts, ignore_index=True, sort=True)
|
||||
|
||||
expected = df._append(DataFrame(dicts), ignore_index=True, sort=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_empty_dataframe(self):
|
||||
|
||||
# Empty df append empty df
|
||||
df1 = DataFrame()
|
||||
df2 = DataFrame()
|
||||
result = df1._append(df2)
|
||||
expected = df1.copy()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Non-empty df append empty df
|
||||
df1 = DataFrame(np.random.randn(5, 2))
|
||||
df2 = DataFrame()
|
||||
result = df1._append(df2)
|
||||
expected = df1.copy()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Empty df with columns append empty df
|
||||
df1 = DataFrame(columns=["bar", "foo"])
|
||||
df2 = DataFrame()
|
||||
result = df1._append(df2)
|
||||
expected = df1.copy()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Non-Empty df with columns append empty df
|
||||
df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"])
|
||||
df2 = DataFrame()
|
||||
result = df1._append(df2)
|
||||
expected = df1.copy()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_dtypes(self, using_array_manager):
|
||||
|
||||
# GH 5754
|
||||
# row appends of different dtypes (so need to do by-item)
|
||||
# can sometimes infer the correct type
|
||||
|
||||
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5))
|
||||
df2 = DataFrame()
|
||||
result = df1._append(df2)
|
||||
expected = df1.copy()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
|
||||
df2 = DataFrame({"bar": "foo"}, index=range(1, 2))
|
||||
result = df1._append(df2)
|
||||
expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
|
||||
df2 = DataFrame({"bar": np.nan}, index=range(1, 2))
|
||||
result = df1._append(df2)
|
||||
expected = DataFrame(
|
||||
{"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
|
||||
)
|
||||
if using_array_manager:
|
||||
# TODO(ArrayManager) decide on exact casting rules in concat
|
||||
# With ArrayManager, all-NaN float is not ignored
|
||||
expected = expected.astype(object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
|
||||
df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object)
|
||||
result = df1._append(df2)
|
||||
expected = DataFrame(
|
||||
{"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}
|
||||
)
|
||||
if using_array_manager:
|
||||
# With ArrayManager, all-NaN float is not ignored
|
||||
expected = expected.astype(object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df1 = DataFrame({"bar": np.nan}, index=range(1))
|
||||
df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2))
|
||||
result = df1._append(df2)
|
||||
expected = DataFrame(
|
||||
{"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")}
|
||||
)
|
||||
if using_array_manager:
|
||||
# With ArrayManager, all-NaN float is not ignored
|
||||
expected = expected.astype(object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
|
||||
df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object)
|
||||
result = df1._append(df2)
|
||||
expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"]
|
||||
)
|
||||
def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp):
|
||||
# GH 30238
|
||||
tz = tz_naive_fixture
|
||||
df = DataFrame([Timestamp(timestamp, tz=tz)])
|
||||
result = df._append(df.iloc[0]).iloc[-1]
|
||||
expected = Series(Timestamp(timestamp, tz=tz), name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype",
|
||||
[
|
||||
([1], pd.Int64Dtype()),
|
||||
([1], pd.CategoricalDtype()),
|
||||
([pd.Interval(left=0, right=5)], pd.IntervalDtype()),
|
||||
([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")),
|
||||
([1], pd.SparseDtype()),
|
||||
],
|
||||
)
|
||||
def test_other_dtypes(self, data, dtype):
|
||||
df = DataFrame(data, dtype=dtype)
|
||||
result = df._append(df.iloc[0]).iloc[-1]
|
||||
expected = Series(data, name=0, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||||
def test_append_numpy_bug_1681(self, dtype):
|
||||
# another datetime64 bug
|
||||
if dtype == "datetime64[ns]":
|
||||
index = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
|
||||
else:
|
||||
index = timedelta_range("1 days", "10 days", freq="2D")
|
||||
|
||||
df = DataFrame()
|
||||
other = DataFrame({"A": "foo", "B": index}, index=index)
|
||||
|
||||
result = df._append(other)
|
||||
assert (result["B"] == index).all()
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning")
|
||||
def test_multiindex_column_append_multiple(self):
|
||||
# GH 29699
|
||||
df = DataFrame(
|
||||
[[1, 11], [2, 12], [3, 13]],
|
||||
columns=pd.MultiIndex.from_tuples(
|
||||
[("multi", "col1"), ("multi", "col2")], names=["level1", None]
|
||||
),
|
||||
)
|
||||
df2 = df.copy()
|
||||
for i in range(1, 10):
|
||||
df[i, "colA"] = 10
|
||||
df = df._append(df2, ignore_index=True)
|
||||
result = df["multi"]
|
||||
expected = DataFrame(
|
||||
{"col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1)}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_append_raises_future_warning(self):
|
||||
# GH#35407
|
||||
df1 = DataFrame([[1, 2], [3, 4]])
|
||||
df2 = DataFrame([[5, 6], [7, 8]])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df1.append(df2)
|
||||
198
dist/client/pandas/tests/frame/methods/test_asfreq.py
vendored
Normal file
198
dist/client/pandas/tests/frame/methods/test_asfreq.py
vendored
Normal file
@@ -0,0 +1,198 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
|
||||
class TestAsFreq:
|
||||
def test_asfreq2(self, frame_or_series):
|
||||
ts = frame_or_series(
|
||||
[0.0, 1.0, 2.0],
|
||||
index=DatetimeIndex(
|
||||
[
|
||||
datetime(2009, 10, 30),
|
||||
datetime(2009, 11, 30),
|
||||
datetime(2009, 12, 31),
|
||||
],
|
||||
freq="BM",
|
||||
),
|
||||
)
|
||||
|
||||
daily_ts = ts.asfreq("B")
|
||||
monthly_ts = daily_ts.asfreq("BM")
|
||||
tm.assert_equal(monthly_ts, ts)
|
||||
|
||||
daily_ts = ts.asfreq("B", method="pad")
|
||||
monthly_ts = daily_ts.asfreq("BM")
|
||||
tm.assert_equal(monthly_ts, ts)
|
||||
|
||||
daily_ts = ts.asfreq(offsets.BDay())
|
||||
monthly_ts = daily_ts.asfreq(offsets.BMonthEnd())
|
||||
tm.assert_equal(monthly_ts, ts)
|
||||
|
||||
result = ts[:0].asfreq("M")
|
||||
assert len(result) == 0
|
||||
assert result is not ts
|
||||
|
||||
if frame_or_series is Series:
|
||||
daily_ts = ts.asfreq("D", fill_value=-1)
|
||||
result = daily_ts.value_counts().sort_index()
|
||||
expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_asfreq_datetimeindex_empty(self, frame_or_series):
|
||||
# GH#14320
|
||||
index = DatetimeIndex(["2016-09-29 11:00"])
|
||||
expected = frame_or_series(index=index, dtype=object).asfreq("H")
|
||||
result = frame_or_series([3], index=index.copy()).asfreq("H")
|
||||
tm.assert_index_equal(expected.index, result.index)
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_tz_aware_asfreq_smoke(self, tz, frame_or_series):
|
||||
dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz)
|
||||
|
||||
obj = frame_or_series(np.random.randn(len(dr)), index=dr)
|
||||
|
||||
# it works!
|
||||
obj.asfreq("T")
|
||||
|
||||
def test_asfreq_normalize(self, frame_or_series):
|
||||
rng = date_range("1/1/2000 09:30", periods=20)
|
||||
norm = date_range("1/1/2000", periods=20)
|
||||
|
||||
vals = np.random.randn(20, 3)
|
||||
|
||||
obj = DataFrame(vals, index=rng)
|
||||
expected = DataFrame(vals, index=norm)
|
||||
if frame_or_series is Series:
|
||||
obj = obj[0]
|
||||
expected = expected[0]
|
||||
|
||||
result = obj.asfreq("D", normalize=True)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_asfreq_keep_index_name(self, frame_or_series):
|
||||
# GH#9854
|
||||
index_name = "bar"
|
||||
index = date_range("20130101", periods=20, name=index_name)
|
||||
obj = DataFrame(list(range(20)), columns=["foo"], index=index)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
assert index_name == obj.index.name
|
||||
assert index_name == obj.asfreq("10D").index.name
|
||||
|
||||
def test_asfreq_ts(self, frame_or_series):
|
||||
index = period_range(freq="A", start="1/1/2001", end="12/31/2010")
|
||||
obj = DataFrame(np.random.randn(len(index), 3), index=index)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
result = obj.asfreq("D", how="end")
|
||||
exp_index = index.asfreq("D", how="end")
|
||||
assert len(result) == len(obj)
|
||||
tm.assert_index_equal(result.index, exp_index)
|
||||
|
||||
result = obj.asfreq("D", how="start")
|
||||
exp_index = index.asfreq("D", how="start")
|
||||
assert len(result) == len(obj)
|
||||
tm.assert_index_equal(result.index, exp_index)
|
||||
|
||||
def test_asfreq_resample_set_correct_freq(self, frame_or_series):
|
||||
# GH#5613
|
||||
# we test if .asfreq() and .resample() set the correct value for .freq
|
||||
dti = to_datetime(["2012-01-01", "2012-01-02", "2012-01-03"])
|
||||
obj = DataFrame({"col": [1, 2, 3]}, index=dti)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
# testing the settings before calling .asfreq() and .resample()
|
||||
assert obj.index.freq is None
|
||||
assert obj.index.inferred_freq == "D"
|
||||
|
||||
# does .asfreq() set .freq correctly?
|
||||
assert obj.asfreq("D").index.freq == "D"
|
||||
|
||||
# does .resample() set .freq correctly?
|
||||
assert obj.resample("D").asfreq().index.freq == "D"
|
||||
|
||||
def test_asfreq_empty(self, datetime_frame):
|
||||
# test does not blow up on length-0 DataFrame
|
||||
zero_length = datetime_frame.reindex([])
|
||||
result = zero_length.asfreq("BM")
|
||||
assert result is not zero_length
|
||||
|
||||
def test_asfreq(self, datetime_frame):
|
||||
offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd())
|
||||
rule_monthly = datetime_frame.asfreq("BM")
|
||||
|
||||
tm.assert_frame_equal(offset_monthly, rule_monthly)
|
||||
|
||||
filled = rule_monthly.asfreq("B", method="pad") # noqa
|
||||
# TODO: actually check that this worked.
|
||||
|
||||
# don't forget!
|
||||
filled_dep = rule_monthly.asfreq("B", method="pad") # noqa
|
||||
|
||||
def test_asfreq_datetimeindex(self):
|
||||
df = DataFrame(
|
||||
{"A": [1, 2, 3]},
|
||||
index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)],
|
||||
)
|
||||
df = df.asfreq("B")
|
||||
assert isinstance(df.index, DatetimeIndex)
|
||||
|
||||
ts = df["A"].asfreq("B")
|
||||
assert isinstance(ts.index, DatetimeIndex)
|
||||
|
||||
def test_asfreq_fillvalue(self):
|
||||
# test for fill value during upsampling, related to issue 3715
|
||||
|
||||
# setup
|
||||
rng = date_range("1/1/2016", periods=10, freq="2S")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
df = DataFrame({"one": ts})
|
||||
|
||||
# insert pre-existing missing value
|
||||
df.loc["2016-01-01 00:00:08", "one"] = None
|
||||
|
||||
actual_df = df.asfreq(freq="1S", fill_value=9.0)
|
||||
expected_df = df.asfreq(freq="1S").fillna(9.0)
|
||||
expected_df.loc["2016-01-01 00:00:08", "one"] = None
|
||||
tm.assert_frame_equal(expected_df, actual_df)
|
||||
|
||||
expected_series = ts.asfreq(freq="1S").fillna(9.0)
|
||||
actual_series = ts.asfreq(freq="1S", fill_value=9.0)
|
||||
tm.assert_series_equal(expected_series, actual_series)
|
||||
|
||||
def test_asfreq_with_date_object_index(self, frame_or_series):
|
||||
rng = date_range("1/1/2000", periods=20)
|
||||
ts = frame_or_series(np.random.randn(20), index=rng)
|
||||
|
||||
ts2 = ts.copy()
|
||||
ts2.index = [x.date() for x in ts2.index]
|
||||
|
||||
result = ts2.asfreq("4H", method="ffill")
|
||||
expected = ts.asfreq("4H", method="ffill")
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_asfreq_with_unsorted_index(self, frame_or_series):
|
||||
# GH#39805
|
||||
# Test that rows are not dropped when the datetime index is out of order
|
||||
index = to_datetime(["2021-01-04", "2021-01-02", "2021-01-03", "2021-01-01"])
|
||||
result = frame_or_series(range(4), index=index)
|
||||
|
||||
expected = result.reindex(sorted(index))
|
||||
expected.index = expected.index._with_freq("infer")
|
||||
|
||||
result = result.asfreq("D")
|
||||
tm.assert_equal(result, expected)
|
||||
182
dist/client/pandas/tests/frame/methods/test_asof.py
vendored
Normal file
182
dist/client/pandas/tests/frame/methods/test_asof.py
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import IncompatibleFrequency
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Period,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def date_range_frame():
|
||||
"""
|
||||
Fixture for DataFrame of ints with date_range index
|
||||
|
||||
Columns are ['A', 'B'].
|
||||
"""
|
||||
N = 50
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
return DataFrame({"A": np.arange(N), "B": np.arange(N)}, index=rng)
|
||||
|
||||
|
||||
class TestFrameAsof:
|
||||
def test_basic(self, date_range_frame):
|
||||
df = date_range_frame
|
||||
N = 50
|
||||
df.loc[df.index[15:30], "A"] = np.nan
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
|
||||
|
||||
result = df.asof(dates)
|
||||
assert result.notna().all(1).all()
|
||||
lb = df.index[14]
|
||||
ub = df.index[30]
|
||||
|
||||
dates = list(dates)
|
||||
|
||||
result = df.asof(dates)
|
||||
assert result.notna().all(1).all()
|
||||
|
||||
mask = (result.index >= lb) & (result.index < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == 14).all(1).all()
|
||||
|
||||
def test_subset(self, date_range_frame):
|
||||
N = 10
|
||||
df = date_range_frame.iloc[:N].copy()
|
||||
df.loc[df.index[4:8], "A"] = np.nan
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
|
||||
|
||||
# with a subset of A should be the same
|
||||
result = df.asof(dates, subset="A")
|
||||
expected = df.asof(dates)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# same with A/B
|
||||
result = df.asof(dates, subset=["A", "B"])
|
||||
expected = df.asof(dates)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B gives df.asof
|
||||
result = df.asof(dates, subset="B")
|
||||
expected = df.resample("25s", closed="right").ffill().reindex(dates)
|
||||
expected.iloc[20:] = 9
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_missing(self, date_range_frame):
|
||||
# GH 15118
|
||||
# no match found - `where` value before earliest date in index
|
||||
N = 10
|
||||
df = date_range_frame.iloc[:N].copy()
|
||||
|
||||
result = df.asof("1989-12-31")
|
||||
|
||||
expected = Series(
|
||||
index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.asof(to_datetime(["1989-12-31"]))
|
||||
expected = DataFrame(
|
||||
index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Check that we handle PeriodIndex correctly, dont end up with
|
||||
# period.ordinal for series name
|
||||
df = df.to_period("D")
|
||||
result = df.asof("1989-12-31")
|
||||
assert isinstance(result.name, Period)
|
||||
|
||||
def test_asof_all_nans(self, frame_or_series):
|
||||
# GH 15713
|
||||
# DataFrame/Series is all nans
|
||||
result = frame_or_series([np.nan]).asof([0])
|
||||
expected = frame_or_series([np.nan])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_all_nans(self, date_range_frame):
|
||||
# GH 15713
|
||||
# DataFrame is all nans
|
||||
|
||||
# testing non-default indexes, multiple inputs
|
||||
N = 150
|
||||
rng = date_range_frame.index
|
||||
dates = date_range("1/1/1990", periods=N, freq="25s")
|
||||
result = DataFrame(np.nan, index=rng, columns=["A"]).asof(dates)
|
||||
expected = DataFrame(np.nan, index=dates, columns=["A"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# testing multiple columns
|
||||
dates = date_range("1/1/1990", periods=N, freq="25s")
|
||||
result = DataFrame(np.nan, index=rng, columns=["A", "B", "C"]).asof(dates)
|
||||
expected = DataFrame(np.nan, index=dates, columns=["A", "B", "C"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# testing scalar input
|
||||
result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof([3])
|
||||
expected = DataFrame(np.nan, index=[3], columns=["A", "B"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof(3)
|
||||
expected = Series(np.nan, index=["A", "B"], name=3)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"stamp,expected",
|
||||
[
|
||||
(
|
||||
Timestamp("2018-01-01 23:22:43.325+00:00"),
|
||||
Series(2.0, name=Timestamp("2018-01-01 23:22:43.325+00:00")),
|
||||
),
|
||||
(
|
||||
Timestamp("2018-01-01 22:33:20.682+01:00"),
|
||||
Series(1.0, name=Timestamp("2018-01-01 22:33:20.682+01:00")),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_time_zone_aware_index(self, stamp, expected):
|
||||
# GH21194
|
||||
# Testing awareness of DataFrame index considering different
|
||||
# UTC and timezone
|
||||
df = DataFrame(
|
||||
data=[1, 2],
|
||||
index=[
|
||||
Timestamp("2018-01-01 21:00:05.001+00:00"),
|
||||
Timestamp("2018-01-01 22:35:10.550+00:00"),
|
||||
],
|
||||
)
|
||||
|
||||
result = df.asof(stamp)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_is_copy(self, date_range_frame):
|
||||
# GH-27357, GH-30784: ensure the result of asof is an actual copy and
|
||||
# doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
|
||||
df = date_range_frame
|
||||
N = 50
|
||||
df.loc[df.index[15:30], "A"] = np.nan
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
|
||||
|
||||
result = df.asof(dates)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
result["C"] = 1
|
||||
|
||||
def test_asof_periodindex_mismatched_freq(self):
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="H")
|
||||
df = DataFrame(np.random.randn(N), index=rng)
|
||||
|
||||
# Mismatched freq
|
||||
msg = "Input has different freq"
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
df.asof(rng.asfreq("D"))
|
||||
84
dist/client/pandas/tests/frame/methods/test_assign.py
vendored
Normal file
84
dist/client/pandas/tests/frame/methods/test_assign.py
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAssign:
|
||||
def test_assign(self):
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
|
||||
original = df.copy()
|
||||
result = df.assign(C=df.B / df.A)
|
||||
expected = df.copy()
|
||||
expected["C"] = [4, 2.5, 2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# lambda syntax
|
||||
result = df.assign(C=lambda x: x.B / x.A)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# original is unmodified
|
||||
tm.assert_frame_equal(df, original)
|
||||
|
||||
# Non-Series array-like
|
||||
result = df.assign(C=[4, 2.5, 2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
# original is unmodified
|
||||
tm.assert_frame_equal(df, original)
|
||||
|
||||
result = df.assign(B=df.B / df.A)
|
||||
expected = expected.drop("B", axis=1).rename(columns={"C": "B"})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# overwrite
|
||||
result = df.assign(A=df.A + df.B)
|
||||
expected = df.copy()
|
||||
expected["A"] = [5, 7, 9]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# lambda
|
||||
result = df.assign(A=lambda x: x.A + x.B)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_assign_multiple(self):
|
||||
df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"])
|
||||
result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B)
|
||||
expected = DataFrame(
|
||||
[[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_assign_order(self):
|
||||
# GH 9818
|
||||
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
|
||||
result = df.assign(D=df.A + df.B, C=df.A - df.B)
|
||||
|
||||
expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.assign(C=df.A - df.B, D=df.A + df.B)
|
||||
|
||||
expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD"))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_assign_bad(self):
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
|
||||
|
||||
# non-keyword argument
|
||||
msg = r"assign\(\) takes 1 positional argument but 2 were given"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.assign(lambda x: x.A)
|
||||
msg = "'DataFrame' object has no attribute 'C'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
df.assign(C=df.A, D=df.A + df.C)
|
||||
|
||||
def test_assign_dependent(self):
|
||||
df = DataFrame({"A": [1, 2], "B": [3, 4]})
|
||||
|
||||
result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"])
|
||||
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"])
|
||||
expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
789
dist/client/pandas/tests/frame/methods/test_astype.py
vendored
Normal file
789
dist/client/pandas/tests/frame/methods/test_astype.py
vendored
Normal file
@@ -0,0 +1,789 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
DataFrame,
|
||||
DatetimeTZDtype,
|
||||
Interval,
|
||||
IntervalDtype,
|
||||
NaT,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import UInt64Index
|
||||
from pandas.core.arrays.integer import coerce_to_array
|
||||
|
||||
|
||||
def _check_cast(df, v):
|
||||
"""
|
||||
Check if all dtypes of df are equal to v
|
||||
"""
|
||||
assert all(s.dtype.name == v for _, s in df.items())
|
||||
|
||||
|
||||
class TestAstype:
|
||||
def test_astype_float(self, float_frame):
|
||||
casted = float_frame.astype(int)
|
||||
expected = DataFrame(
|
||||
float_frame.values.astype(int),
|
||||
index=float_frame.index,
|
||||
columns=float_frame.columns,
|
||||
)
|
||||
tm.assert_frame_equal(casted, expected)
|
||||
|
||||
casted = float_frame.astype(np.int32)
|
||||
expected = DataFrame(
|
||||
float_frame.values.astype(np.int32),
|
||||
index=float_frame.index,
|
||||
columns=float_frame.columns,
|
||||
)
|
||||
tm.assert_frame_equal(casted, expected)
|
||||
|
||||
float_frame["foo"] = "5"
|
||||
casted = float_frame.astype(int)
|
||||
expected = DataFrame(
|
||||
float_frame.values.astype(int),
|
||||
index=float_frame.index,
|
||||
columns=float_frame.columns,
|
||||
)
|
||||
tm.assert_frame_equal(casted, expected)
|
||||
|
||||
def test_astype_mixed_float(self, mixed_float_frame):
|
||||
# mixed casting
|
||||
casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float32")
|
||||
_check_cast(casted, "float32")
|
||||
|
||||
casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float16")
|
||||
_check_cast(casted, "float16")
|
||||
|
||||
def test_astype_mixed_type(self, mixed_type_frame):
|
||||
# mixed casting
|
||||
mn = mixed_type_frame._get_numeric_data().copy()
|
||||
mn["little_float"] = np.array(12345.0, dtype="float16")
|
||||
mn["big_float"] = np.array(123456789101112.0, dtype="float64")
|
||||
|
||||
casted = mn.astype("float64")
|
||||
_check_cast(casted, "float64")
|
||||
|
||||
casted = mn.astype("int64")
|
||||
_check_cast(casted, "int64")
|
||||
|
||||
casted = mn.reindex(columns=["little_float"]).astype("float16")
|
||||
_check_cast(casted, "float16")
|
||||
|
||||
casted = mn.astype("float32")
|
||||
_check_cast(casted, "float32")
|
||||
|
||||
casted = mn.astype("int32")
|
||||
_check_cast(casted, "int32")
|
||||
|
||||
# to object
|
||||
casted = mn.astype("O")
|
||||
_check_cast(casted, "object")
|
||||
|
||||
def test_astype_with_exclude_string(self, float_frame):
|
||||
df = float_frame.copy()
|
||||
expected = float_frame.astype(int)
|
||||
df["string"] = "foo"
|
||||
casted = df.astype(int, errors="ignore")
|
||||
|
||||
expected["string"] = "foo"
|
||||
tm.assert_frame_equal(casted, expected)
|
||||
|
||||
df = float_frame.copy()
|
||||
expected = float_frame.astype(np.int32)
|
||||
df["string"] = "foo"
|
||||
casted = df.astype(np.int32, errors="ignore")
|
||||
|
||||
expected["string"] = "foo"
|
||||
tm.assert_frame_equal(casted, expected)
|
||||
|
||||
def test_astype_with_view_float(self, float_frame):
|
||||
|
||||
# this is the only real reason to do it this way
|
||||
tf = np.round(float_frame).astype(np.int32)
|
||||
casted = tf.astype(np.float32, copy=False)
|
||||
|
||||
# TODO(wesm): verification?
|
||||
tf = float_frame.astype(np.float64)
|
||||
casted = tf.astype(np.int64, copy=False) # noqa
|
||||
|
||||
def test_astype_with_view_mixed_float(self, mixed_float_frame):
|
||||
|
||||
tf = mixed_float_frame.reindex(columns=["A", "B", "C"])
|
||||
|
||||
casted = tf.astype(np.int64)
|
||||
casted = tf.astype(np.float32) # noqa
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
|
||||
@pytest.mark.parametrize("val", [np.nan, np.inf])
|
||||
def test_astype_cast_nan_inf_int(self, val, dtype):
|
||||
# see GH#14265
|
||||
#
|
||||
# Check NaN and inf --> raise error when converting to int.
|
||||
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
|
||||
df = DataFrame([val])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.astype(dtype)
|
||||
|
||||
def test_astype_str(self):
|
||||
# see GH#9757
|
||||
a = Series(date_range("2010-01-04", periods=5))
|
||||
b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern"))
|
||||
c = Series([Timedelta(x, unit="d") for x in range(5)])
|
||||
d = Series(range(5))
|
||||
e = Series([0.0, 0.2, 0.4, 0.6, 0.8])
|
||||
|
||||
df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e})
|
||||
|
||||
# Datetime-like
|
||||
result = df.astype(str)
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"a": list(map(str, map(lambda x: Timestamp(x)._date_repr, a._values))),
|
||||
"b": list(map(str, map(Timestamp, b._values))),
|
||||
"c": list(map(lambda x: Timedelta(x)._repr_base(), c._values)),
|
||||
"d": list(map(str, d._values)),
|
||||
"e": list(map(str, e._values)),
|
||||
}
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_astype_str_float(self):
|
||||
# see GH#11302
|
||||
result = DataFrame([np.NaN]).astype(str)
|
||||
expected = DataFrame(["nan"])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = DataFrame([1.12345678901234567890]).astype(str)
|
||||
|
||||
val = "1.1234567890123457"
|
||||
expected = DataFrame([val])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype_class", [dict, Series])
|
||||
def test_astype_dict_like(self, dtype_class):
|
||||
# GH7271 & GH16717
|
||||
a = Series(date_range("2010-01-04", periods=5))
|
||||
b = Series(range(5))
|
||||
c = Series([0.0, 0.2, 0.4, 0.6, 0.8])
|
||||
d = Series(["1.0", "2", "3.14", "4", "5.4"])
|
||||
df = DataFrame({"a": a, "b": b, "c": c, "d": d})
|
||||
original = df.copy(deep=True)
|
||||
|
||||
# change type of a subset of columns
|
||||
dt1 = dtype_class({"b": "str", "d": "float32"})
|
||||
result = df.astype(dt1)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"a": a,
|
||||
"b": Series(["0", "1", "2", "3", "4"]),
|
||||
"c": c,
|
||||
"d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(df, original)
|
||||
|
||||
dt2 = dtype_class({"b": np.float32, "c": "float32", "d": np.float64})
|
||||
result = df.astype(dt2)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"a": a,
|
||||
"b": Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype="float32"),
|
||||
"c": Series([0.0, 0.2, 0.4, 0.6, 0.8], dtype="float32"),
|
||||
"d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float64"),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(df, original)
|
||||
|
||||
# change all columns
|
||||
dt3 = dtype_class({"a": str, "b": str, "c": str, "d": str})
|
||||
tm.assert_frame_equal(df.astype(dt3), df.astype(str))
|
||||
tm.assert_frame_equal(df, original)
|
||||
|
||||
# error should be raised when using something other than column labels
|
||||
# in the keys of the dtype dict
|
||||
dt4 = dtype_class({"b": str, 2: str})
|
||||
dt5 = dtype_class({"e": str})
|
||||
msg_frame = (
|
||||
"Only a column name can be used for the key in a dtype mappings argument. "
|
||||
"'{}' not found in columns."
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg_frame.format(2)):
|
||||
df.astype(dt4)
|
||||
with pytest.raises(KeyError, match=msg_frame.format("e")):
|
||||
df.astype(dt5)
|
||||
tm.assert_frame_equal(df, original)
|
||||
|
||||
# if the dtypes provided are the same as the original dtypes, the
|
||||
# resulting DataFrame should be the same as the original DataFrame
|
||||
dt6 = dtype_class({col: df[col].dtype for col in df.columns})
|
||||
equiv = df.astype(dt6)
|
||||
tm.assert_frame_equal(df, equiv)
|
||||
tm.assert_frame_equal(df, original)
|
||||
|
||||
# GH#16717
|
||||
# if dtypes provided is empty, the resulting DataFrame
|
||||
# should be the same as the original DataFrame
|
||||
dt7 = dtype_class({}) if dtype_class is dict else dtype_class({}, dtype=object)
|
||||
equiv = df.astype(dt7)
|
||||
tm.assert_frame_equal(df, equiv)
|
||||
tm.assert_frame_equal(df, original)
|
||||
|
||||
def test_astype_duplicate_col(self):
|
||||
a1 = Series([1, 2, 3, 4, 5], name="a")
|
||||
b = Series([0.1, 0.2, 0.4, 0.6, 0.8], name="b")
|
||||
a2 = Series([0, 1, 2, 3, 4], name="a")
|
||||
df = concat([a1, b, a2], axis=1)
|
||||
|
||||
result = df.astype(str)
|
||||
a1_str = Series(["1", "2", "3", "4", "5"], dtype="str", name="a")
|
||||
b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype=str, name="b")
|
||||
a2_str = Series(["0", "1", "2", "3", "4"], dtype="str", name="a")
|
||||
expected = concat([a1_str, b_str, a2_str], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.astype({"a": "str"})
|
||||
expected = concat([a1_str, b, a2_str], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_astype_duplicate_col_series_arg(self):
|
||||
# GH#44417
|
||||
vals = np.random.randn(3, 4)
|
||||
df = DataFrame(vals, columns=["A", "B", "C", "A"])
|
||||
dtypes = df.dtypes
|
||||
dtypes.iloc[0] = str
|
||||
dtypes.iloc[2] = "Float64"
|
||||
|
||||
result = df.astype(dtypes)
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: vals[:, 0].astype(str),
|
||||
1: vals[:, 1],
|
||||
2: pd.array(vals[:, 2], dtype="Float64"),
|
||||
3: vals[:, 3],
|
||||
}
|
||||
)
|
||||
expected.columns = df.columns
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
"category",
|
||||
CategoricalDtype(),
|
||||
CategoricalDtype(ordered=True),
|
||||
CategoricalDtype(ordered=False),
|
||||
CategoricalDtype(categories=list("abcdef")),
|
||||
CategoricalDtype(categories=list("edba"), ordered=False),
|
||||
CategoricalDtype(categories=list("edcb"), ordered=True),
|
||||
],
|
||||
ids=repr,
|
||||
)
|
||||
def test_astype_categorical(self, dtype):
|
||||
# GH#18099
|
||||
d = {"A": list("abbc"), "B": list("bccd"), "C": list("cdde")}
|
||||
df = DataFrame(d)
|
||||
result = df.astype(dtype)
|
||||
expected = DataFrame({k: Categorical(d[k], dtype=dtype) for k in d})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("cls", [CategoricalDtype, DatetimeTZDtype, IntervalDtype])
|
||||
def test_astype_categoricaldtype_class_raises(self, cls):
|
||||
df = DataFrame({"A": ["a", "a", "b", "c"]})
|
||||
xpr = f"Expected an instance of {cls.__name__}"
|
||||
with pytest.raises(TypeError, match=xpr):
|
||||
df.astype({"A": cls})
|
||||
|
||||
with pytest.raises(TypeError, match=xpr):
|
||||
df["A"].astype(cls)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"])
|
||||
def test_astype_extension_dtypes(self, dtype):
|
||||
# GH#22578
|
||||
df = DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"])
|
||||
|
||||
expected1 = DataFrame(
|
||||
{
|
||||
"a": pd.array([1, 3, 5], dtype=dtype),
|
||||
"b": pd.array([2, 4, 6], dtype=dtype),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(df.astype(dtype), expected1)
|
||||
tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1)
|
||||
tm.assert_frame_equal(df.astype(dtype).astype("float64"), df)
|
||||
|
||||
df = DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"])
|
||||
df["b"] = df["b"].astype(dtype)
|
||||
expected2 = DataFrame(
|
||||
{"a": [1.0, 3.0, 5.0], "b": pd.array([2, 4, 6], dtype=dtype)}
|
||||
)
|
||||
tm.assert_frame_equal(df, expected2)
|
||||
|
||||
tm.assert_frame_equal(df.astype(dtype), expected1)
|
||||
tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"])
|
||||
def test_astype_extension_dtypes_1d(self, dtype):
|
||||
# GH#22578
|
||||
df = DataFrame({"a": [1.0, 2.0, 3.0]})
|
||||
|
||||
expected1 = DataFrame({"a": pd.array([1, 2, 3], dtype=dtype)})
|
||||
tm.assert_frame_equal(df.astype(dtype), expected1)
|
||||
tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1)
|
||||
|
||||
df = DataFrame({"a": [1.0, 2.0, 3.0]})
|
||||
df["a"] = df["a"].astype(dtype)
|
||||
expected2 = DataFrame({"a": pd.array([1, 2, 3], dtype=dtype)})
|
||||
tm.assert_frame_equal(df, expected2)
|
||||
|
||||
tm.assert_frame_equal(df.astype(dtype), expected1)
|
||||
tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["category", "Int64"])
|
||||
def test_astype_extension_dtypes_duplicate_col(self, dtype):
|
||||
# GH#24704
|
||||
a1 = Series([0, np.nan, 4], name="a")
|
||||
a2 = Series([np.nan, 3, 5], name="a")
|
||||
df = concat([a1, a2], axis=1)
|
||||
|
||||
result = df.astype(dtype)
|
||||
expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype", [{100: "float64", 200: "uint64"}, "category", "float64"]
|
||||
)
|
||||
def test_astype_column_metadata(self, dtype):
|
||||
# GH#19920
|
||||
columns = UInt64Index([100, 200, 300], name="foo")
|
||||
df = DataFrame(np.arange(15).reshape(5, 3), columns=columns)
|
||||
df = df.astype(dtype)
|
||||
tm.assert_index_equal(df.columns, columns)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["M8", "m8"])
|
||||
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
|
||||
def test_astype_from_datetimelike_to_object(self, dtype, unit):
|
||||
# tests astype to object dtype
|
||||
# GH#19223 / GH#12425
|
||||
dtype = f"{dtype}[{unit}]"
|
||||
arr = np.array([[1, 2, 3]], dtype=dtype)
|
||||
df = DataFrame(arr)
|
||||
result = df.astype(object)
|
||||
assert (result.dtypes == object).all()
|
||||
|
||||
if dtype.startswith("M8"):
|
||||
assert result.iloc[0, 0] == Timestamp(1, unit=unit)
|
||||
else:
|
||||
assert result.iloc[0, 0] == Timedelta(1, unit=unit)
|
||||
|
||||
@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
|
||||
@pytest.mark.parametrize("dtype", ["M8", "m8"])
|
||||
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
|
||||
def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit):
|
||||
# tests all units from numeric origination
|
||||
# GH#19223 / GH#12425
|
||||
dtype = f"{dtype}[{unit}]"
|
||||
arr = np.array([[1, 2, 3]], dtype=arr_dtype)
|
||||
df = DataFrame(arr)
|
||||
result = df.astype(dtype)
|
||||
expected = DataFrame(arr.astype(dtype))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
|
||||
def test_astype_to_datetime_unit(self, unit):
|
||||
# tests all units from datetime origination
|
||||
# GH#19223
|
||||
dtype = f"M8[{unit}]"
|
||||
arr = np.array([[1, 2, 3]], dtype=dtype)
|
||||
df = DataFrame(arr)
|
||||
result = df.astype(dtype)
|
||||
expected = DataFrame(arr.astype(dtype))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("unit", ["ns"])
|
||||
def test_astype_to_timedelta_unit_ns(self, unit):
|
||||
# preserver the timedelta conversion
|
||||
# GH#19223
|
||||
dtype = f"m8[{unit}]"
|
||||
arr = np.array([[1, 2, 3]], dtype=dtype)
|
||||
df = DataFrame(arr)
|
||||
result = df.astype(dtype)
|
||||
expected = DataFrame(arr.astype(dtype))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("unit", ["us", "ms", "s", "h", "m", "D"])
|
||||
def test_astype_to_timedelta_unit(self, unit):
|
||||
# coerce to float
|
||||
# GH#19223
|
||||
dtype = f"m8[{unit}]"
|
||||
arr = np.array([[1, 2, 3]], dtype=dtype)
|
||||
df = DataFrame(arr)
|
||||
result = df.astype(dtype)
|
||||
expected = DataFrame(df.values.astype(dtype).astype(float))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
|
||||
def test_astype_to_incorrect_datetimelike(self, unit):
|
||||
# trying to astype a m to a M, or vice-versa
|
||||
# GH#19224
|
||||
dtype = f"M8[{unit}]"
|
||||
other = f"m8[{unit}]"
|
||||
|
||||
df = DataFrame(np.array([[1, 2, 3]], dtype=dtype))
|
||||
msg = "|".join(
|
||||
[
|
||||
# BlockManager path
|
||||
rf"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]",
|
||||
# ArrayManager path
|
||||
"cannot astype a datetimelike from "
|
||||
rf"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.astype(other)
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
# BlockManager path
|
||||
rf"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]",
|
||||
# ArrayManager path
|
||||
"cannot astype a timedelta from "
|
||||
rf"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]",
|
||||
]
|
||||
)
|
||||
df = DataFrame(np.array([[1, 2, 3]], dtype=other))
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.astype(dtype)
|
||||
|
||||
def test_astype_arg_for_errors(self):
|
||||
# GH#14878
|
||||
|
||||
df = DataFrame([1, 2, 3])
|
||||
|
||||
msg = (
|
||||
"Expected value of kwarg 'errors' to be one of "
|
||||
"['raise', 'ignore']. Supplied value is 'True'"
|
||||
)
|
||||
with pytest.raises(ValueError, match=re.escape(msg)):
|
||||
df.astype(np.float64, errors=True)
|
||||
|
||||
df.astype(np.int8, errors="ignore")
|
||||
|
||||
def test_astype_arg_for_errors_dictlist(self):
|
||||
# GH#25905
|
||||
df = DataFrame(
|
||||
[
|
||||
{"a": "1", "b": "16.5%", "c": "test"},
|
||||
{"a": "2.2", "b": "15.3", "c": "another_test"},
|
||||
]
|
||||
)
|
||||
expected = DataFrame(
|
||||
[
|
||||
{"a": 1.0, "b": "16.5%", "c": "test"},
|
||||
{"a": 2.2, "b": "15.3", "c": "another_test"},
|
||||
]
|
||||
)
|
||||
type_dict = {"a": "float64", "b": "float64", "c": "object"}
|
||||
|
||||
result = df.astype(dtype=type_dict, errors="ignore")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_astype_dt64tz(self, timezone_frame):
|
||||
# astype
|
||||
expected = np.array(
|
||||
[
|
||||
[
|
||||
Timestamp("2013-01-01 00:00:00"),
|
||||
Timestamp("2013-01-02 00:00:00"),
|
||||
Timestamp("2013-01-03 00:00:00"),
|
||||
],
|
||||
[
|
||||
Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"),
|
||||
NaT,
|
||||
Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"),
|
||||
],
|
||||
[
|
||||
Timestamp("2013-01-01 00:00:00+0100", tz="CET"),
|
||||
NaT,
|
||||
Timestamp("2013-01-03 00:00:00+0100", tz="CET"),
|
||||
],
|
||||
],
|
||||
dtype=object,
|
||||
).T
|
||||
expected = DataFrame(
|
||||
expected,
|
||||
index=timezone_frame.index,
|
||||
columns=timezone_frame.columns,
|
||||
dtype=object,
|
||||
)
|
||||
result = timezone_frame.astype(object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# dt64tz->dt64 deprecated
|
||||
result = timezone_frame.astype("datetime64[ns]")
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": date_range("20130101", periods=3),
|
||||
"B": (
|
||||
date_range("20130101", periods=3, tz="US/Eastern")
|
||||
.tz_convert("UTC")
|
||||
.tz_localize(None)
|
||||
),
|
||||
"C": (
|
||||
date_range("20130101", periods=3, tz="CET")
|
||||
.tz_convert("UTC")
|
||||
.tz_localize(None)
|
||||
),
|
||||
}
|
||||
)
|
||||
expected.iloc[1, 1] = NaT
|
||||
expected.iloc[1, 2] = NaT
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_astype_dt64tz_to_str(self, timezone_frame):
|
||||
# str formatting
|
||||
result = timezone_frame.astype(str)
|
||||
expected = DataFrame(
|
||||
[
|
||||
[
|
||||
"2013-01-01",
|
||||
"2013-01-01 00:00:00-05:00",
|
||||
"2013-01-01 00:00:00+01:00",
|
||||
],
|
||||
["2013-01-02", "NaT", "NaT"],
|
||||
[
|
||||
"2013-01-03",
|
||||
"2013-01-03 00:00:00-05:00",
|
||||
"2013-01-03 00:00:00+01:00",
|
||||
],
|
||||
],
|
||||
columns=timezone_frame.columns,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with option_context("display.max_columns", 20):
|
||||
result = str(timezone_frame)
|
||||
assert (
|
||||
"0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00"
|
||||
) in result
|
||||
assert (
|
||||
"1 2013-01-02 NaT NaT"
|
||||
) in result
|
||||
assert (
|
||||
"2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00"
|
||||
) in result
|
||||
|
||||
def test_astype_empty_dtype_dict(self):
|
||||
# issue mentioned further down in the following issue's thread
|
||||
# https://github.com/pandas-dev/pandas/issues/33113
|
||||
df = DataFrame()
|
||||
result = df.astype({})
|
||||
tm.assert_frame_equal(result, df)
|
||||
assert result is not df
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype",
|
||||
[
|
||||
(["x", "y", "z"], "string[python]"),
|
||||
pytest.param(
|
||||
["x", "y", "z"],
|
||||
"string[pyarrow]",
|
||||
marks=td.skip_if_no("pyarrow", min_version="1.0.0"),
|
||||
),
|
||||
(["x", "y", "z"], "category"),
|
||||
(3 * [Timestamp("2020-01-01", tz="UTC")], None),
|
||||
(3 * [Interval(0, 1)], None),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("errors", ["raise", "ignore"])
|
||||
def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
|
||||
# https://github.com/pandas-dev/pandas/issues/35471
|
||||
df = DataFrame(Series(data, dtype=dtype))
|
||||
if errors == "ignore":
|
||||
expected = df
|
||||
result = df.astype(float, errors=errors)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
msg = "(Cannot cast)|(could not convert)"
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
df.astype(float, errors=errors)
|
||||
|
||||
def test_astype_tz_conversion(self):
|
||||
# GH 35973
|
||||
val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")}
|
||||
df = DataFrame(val)
|
||||
result = df.astype({"tz": "datetime64[ns, Europe/Berlin]"})
|
||||
|
||||
expected = df
|
||||
expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"])
|
||||
def test_astype_tz_object_conversion(self, tz):
|
||||
# GH 35973
|
||||
val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")}
|
||||
expected = DataFrame(val)
|
||||
|
||||
# convert expected to object dtype from other tz str (independently tested)
|
||||
result = expected.astype({"tz": f"datetime64[ns, {tz}]"})
|
||||
result = result.astype({"tz": "object"})
|
||||
|
||||
# do real test: object dtype to a specified tz, different from construction tz.
|
||||
result = result.astype({"tz": "datetime64[ns, Europe/London]"})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture):
|
||||
# GH#41409
|
||||
tz = tz_naive_fixture
|
||||
|
||||
dti = date_range("2016-01-01", periods=3, tz=tz)
|
||||
dta = dti._data
|
||||
dta[0] = NaT
|
||||
|
||||
obj = frame_or_series(dta)
|
||||
result = obj.astype("string")
|
||||
|
||||
# Check that Series/DataFrame.astype matches DatetimeArray.astype
|
||||
expected = frame_or_series(dta.astype("string"))
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
item = result.iloc[0]
|
||||
if frame_or_series is DataFrame:
|
||||
item = item.iloc[0]
|
||||
assert item is pd.NA
|
||||
|
||||
# For non-NA values, we should match what we get for non-EA str
|
||||
alt = obj.astype(str)
|
||||
assert np.all(alt.iloc[1:] == result.iloc[1:])
|
||||
|
||||
def test_astype_td64_to_string(self, frame_or_series):
|
||||
# GH#41409
|
||||
tdi = pd.timedelta_range("1 Day", periods=3)
|
||||
obj = frame_or_series(tdi)
|
||||
|
||||
expected = frame_or_series(["1 days", "2 days", "3 days"], dtype="string")
|
||||
result = obj.astype("string")
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_astype_bytes(self):
|
||||
# GH#39474
|
||||
result = DataFrame(["foo", "bar", "baz"]).astype(bytes)
|
||||
assert result.dtypes[0] == np.dtype("S3")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_slice",
|
||||
[
|
||||
np.s_[:2, :2],
|
||||
np.s_[:1, :2],
|
||||
np.s_[:2, :1],
|
||||
np.s_[::2, ::2],
|
||||
np.s_[::1, ::2],
|
||||
np.s_[::2, ::1],
|
||||
],
|
||||
)
|
||||
def test_astype_noncontiguous(self, index_slice):
|
||||
# GH#42396
|
||||
data = np.arange(16).reshape(4, 4)
|
||||
df = DataFrame(data)
|
||||
|
||||
result = df.iloc[index_slice].astype("int16")
|
||||
expected = df.iloc[index_slice]
|
||||
tm.assert_frame_equal(result, expected, check_dtype=False)
|
||||
|
||||
def test_astype_retain_attrs(self, any_numpy_dtype):
|
||||
# GH#44414
|
||||
df = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]})
|
||||
df.attrs["Location"] = "Michigan"
|
||||
|
||||
result = df.astype({"a": any_numpy_dtype}).attrs
|
||||
expected = df.attrs
|
||||
|
||||
tm.assert_dict_equal(expected, result)
|
||||
|
||||
|
||||
class TestAstypeCategorical:
|
||||
def test_astype_from_categorical3(self):
|
||||
df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]})
|
||||
cats = Categorical([1, 2, 3, 4, 5, 6])
|
||||
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
df["cats"] = df["cats"].astype("category")
|
||||
tm.assert_frame_equal(exp_df, df)
|
||||
|
||||
def test_astype_from_categorical4(self):
|
||||
df = DataFrame(
|
||||
{"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]}
|
||||
)
|
||||
cats = Categorical(["a", "b", "b", "a", "a", "d"])
|
||||
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
df["cats"] = df["cats"].astype("category")
|
||||
tm.assert_frame_equal(exp_df, df)
|
||||
|
||||
def test_categorical_astype_to_int(self, any_int_dtype):
|
||||
# GH#39402
|
||||
|
||||
df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])})
|
||||
df.col1 = df.col1.astype("category")
|
||||
df.col1 = df.col1.astype(any_int_dtype)
|
||||
expected = DataFrame({"col1": pd.array([2, 1, 3], dtype=any_int_dtype)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_astype_categorical_to_string_missing(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/41797
|
||||
df = DataFrame(["a", "b", np.nan])
|
||||
expected = df.astype(str)
|
||||
cat = df.astype("category")
|
||||
result = cat.astype(str)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class IntegerArrayNoCopy(pd.core.arrays.IntegerArray):
|
||||
# GH 42501
|
||||
|
||||
@classmethod
|
||||
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
|
||||
values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy)
|
||||
return IntegerArrayNoCopy(values, mask)
|
||||
|
||||
def copy(self):
|
||||
assert False
|
||||
|
||||
|
||||
class Int16DtypeNoCopy(pd.Int16Dtype):
|
||||
# GH 42501
|
||||
|
||||
@classmethod
|
||||
def construct_array_type(cls):
|
||||
return IntegerArrayNoCopy
|
||||
|
||||
|
||||
def test_frame_astype_no_copy():
|
||||
# GH 42501
|
||||
df = DataFrame({"a": [1, 4, None, 5], "b": [6, 7, 8, 9]}, dtype=object)
|
||||
result = df.astype({"a": Int16DtypeNoCopy()}, copy=False)
|
||||
|
||||
assert result.a.dtype == pd.Int16Dtype()
|
||||
assert np.shares_memory(df.b.values, result.b.values)
|
||||
124
dist/client/pandas/tests/frame/methods/test_at_time.py
vendored
Normal file
124
dist/client/pandas/tests/frame/methods/test_at_time.py
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
from datetime import time
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs import timezones
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAtTime:
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_localized_at_time(self, tzstr, frame_or_series):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
rng = date_range("4/16/2012", "5/1/2012", freq="H")
|
||||
ts = frame_or_series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
ts_local = ts.tz_localize(tzstr)
|
||||
|
||||
result = ts_local.at_time(time(10, 0))
|
||||
expected = ts.at_time(time(10, 0)).tz_localize(tzstr)
|
||||
tm.assert_equal(result, expected)
|
||||
assert timezones.tz_compare(result.index.tz, tz)
|
||||
|
||||
def test_at_time(self, frame_or_series):
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
||||
ts = tm.get_obj(ts, frame_or_series)
|
||||
rs = ts.at_time(rng[1])
|
||||
assert (rs.index.hour == rng[1].hour).all()
|
||||
assert (rs.index.minute == rng[1].minute).all()
|
||||
assert (rs.index.second == rng[1].second).all()
|
||||
|
||||
result = ts.at_time("9:30")
|
||||
expected = ts.at_time(time(9, 30))
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_at_time_midnight(self, frame_or_series):
|
||||
# midnight, everything
|
||||
rng = date_range("1/1/2000", "1/31/2000")
|
||||
ts = DataFrame(np.random.randn(len(rng), 3), index=rng)
|
||||
ts = tm.get_obj(ts, frame_or_series)
|
||||
|
||||
result = ts.at_time(time(0, 0))
|
||||
tm.assert_equal(result, ts)
|
||||
|
||||
def test_at_time_nonexistent(self, frame_or_series):
|
||||
# time doesn't exist
|
||||
rng = date_range("1/1/2012", freq="23Min", periods=384)
|
||||
ts = DataFrame(np.random.randn(len(rng)), rng)
|
||||
ts = tm.get_obj(ts, frame_or_series)
|
||||
rs = ts.at_time("16:00")
|
||||
assert len(rs) == 0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)]
|
||||
)
|
||||
def test_at_time_errors(self, hour):
|
||||
# GH#24043
|
||||
dti = date_range("2018", periods=3, freq="H")
|
||||
df = DataFrame(list(range(len(dti))), index=dti)
|
||||
if getattr(hour, "tzinfo", None) is None:
|
||||
result = df.at_time(hour)
|
||||
expected = df.iloc[1:2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(ValueError, match="Index must be timezone"):
|
||||
df.at_time(hour)
|
||||
|
||||
def test_at_time_tz(self):
|
||||
# GH#24043
|
||||
dti = date_range("2018", periods=3, freq="H", tz="US/Pacific")
|
||||
df = DataFrame(list(range(len(dti))), index=dti)
|
||||
result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern")))
|
||||
expected = df.iloc[1:2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_at_time_raises(self, frame_or_series):
|
||||
# GH#20725
|
||||
obj = DataFrame([[1, 2, 3], [4, 5, 6]])
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
msg = "Index must be DatetimeIndex"
|
||||
with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex
|
||||
obj.at_time("00:00")
|
||||
|
||||
@pytest.mark.parametrize("axis", ["index", "columns", 0, 1])
|
||||
def test_at_time_axis(self, axis):
|
||||
# issue 8839
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = DataFrame(np.random.randn(len(rng), len(rng)))
|
||||
ts.index, ts.columns = rng, rng
|
||||
|
||||
indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)]
|
||||
|
||||
if axis in ["index", 0]:
|
||||
expected = ts.loc[indices, :]
|
||||
elif axis in ["columns", 1]:
|
||||
expected = ts.loc[:, indices]
|
||||
|
||||
result = ts.at_time("9:30", axis=axis)
|
||||
|
||||
# Without clearing freq, result has freq 1440T and expected 5T
|
||||
result.index = result.index._with_freq(None)
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_at_time_datetimeindex(self):
|
||||
index = date_range("2012-01-01", "2012-01-05", freq="30min")
|
||||
df = DataFrame(np.random.randn(len(index), 5), index=index)
|
||||
akey = time(12, 0, 0)
|
||||
ainds = [24, 72, 120, 168]
|
||||
|
||||
result = df.at_time(akey)
|
||||
expected = df.loc[akey]
|
||||
expected2 = df.iloc[ainds]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result, expected2)
|
||||
assert len(result) == 4
|
||||
289
dist/client/pandas/tests/frame/methods/test_between_time.py
vendored
Normal file
289
dist/client/pandas/tests/frame/methods/test_between_time.py
vendored
Normal file
@@ -0,0 +1,289 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import timezones
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestBetweenTime:
|
||||
@td.skip_if_has_locale
|
||||
def test_between_time_formats(self, frame_or_series):
|
||||
# GH#11818
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
||||
ts = tm.get_obj(ts, frame_or_series)
|
||||
|
||||
strings = [
|
||||
("2:00", "2:30"),
|
||||
("0200", "0230"),
|
||||
("2:00am", "2:30am"),
|
||||
("0200am", "0230am"),
|
||||
("2:00:00", "2:30:00"),
|
||||
("020000", "023000"),
|
||||
("2:00:00am", "2:30:00am"),
|
||||
("020000am", "023000am"),
|
||||
]
|
||||
expected_length = 28
|
||||
|
||||
for time_string in strings:
|
||||
assert len(ts.between_time(*time_string)) == expected_length
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_localized_between_time(self, tzstr, frame_or_series):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
rng = date_range("4/16/2012", "5/1/2012", freq="H")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
if frame_or_series is DataFrame:
|
||||
ts = ts.to_frame()
|
||||
|
||||
ts_local = ts.tz_localize(tzstr)
|
||||
|
||||
t1, t2 = time(10, 0), time(11, 0)
|
||||
result = ts_local.between_time(t1, t2)
|
||||
expected = ts.between_time(t1, t2).tz_localize(tzstr)
|
||||
tm.assert_equal(result, expected)
|
||||
assert timezones.tz_compare(result.index.tz, tz)
|
||||
|
||||
def test_between_time_types(self, frame_or_series):
|
||||
# GH11818
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
obj = DataFrame({"A": 0}, index=rng)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
|
||||
|
||||
def test_between_time(self, inclusive_endpoints_fixture, frame_or_series):
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
||||
ts = tm.get_obj(ts, frame_or_series)
|
||||
|
||||
stime = time(0, 0)
|
||||
etime = time(1, 0)
|
||||
inclusive = inclusive_endpoints_fixture
|
||||
|
||||
filtered = ts.between_time(stime, etime, inclusive=inclusive)
|
||||
exp_len = 13 * 4 + 1
|
||||
|
||||
if inclusive in ["right", "neither"]:
|
||||
exp_len -= 5
|
||||
if inclusive in ["left", "neither"]:
|
||||
exp_len -= 4
|
||||
|
||||
assert len(filtered) == exp_len
|
||||
for rs in filtered.index:
|
||||
t = rs.time()
|
||||
if inclusive in ["left", "both"]:
|
||||
assert t >= stime
|
||||
else:
|
||||
assert t > stime
|
||||
|
||||
if inclusive in ["right", "both"]:
|
||||
assert t <= etime
|
||||
else:
|
||||
assert t < etime
|
||||
|
||||
result = ts.between_time("00:00", "01:00")
|
||||
expected = ts.between_time(stime, etime)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# across midnight
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
||||
ts = tm.get_obj(ts, frame_or_series)
|
||||
stime = time(22, 0)
|
||||
etime = time(9, 0)
|
||||
|
||||
filtered = ts.between_time(stime, etime, inclusive=inclusive)
|
||||
exp_len = (12 * 11 + 1) * 4 + 1
|
||||
if inclusive in ["right", "neither"]:
|
||||
exp_len -= 4
|
||||
if inclusive in ["left", "neither"]:
|
||||
exp_len -= 4
|
||||
|
||||
assert len(filtered) == exp_len
|
||||
for rs in filtered.index:
|
||||
t = rs.time()
|
||||
if inclusive in ["left", "both"]:
|
||||
assert (t >= stime) or (t <= etime)
|
||||
else:
|
||||
assert (t > stime) or (t <= etime)
|
||||
|
||||
if inclusive in ["right", "both"]:
|
||||
assert (t <= etime) or (t >= stime)
|
||||
else:
|
||||
assert (t < etime) or (t >= stime)
|
||||
|
||||
def test_between_time_raises(self, frame_or_series):
|
||||
# GH#20725
|
||||
obj = DataFrame([[1, 2, 3], [4, 5, 6]])
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
msg = "Index must be DatetimeIndex"
|
||||
with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex
|
||||
obj.between_time(start_time="00:00", end_time="12:00")
|
||||
|
||||
def test_between_time_axis(self, frame_or_series):
|
||||
# GH#8839
|
||||
rng = date_range("1/1/2000", periods=100, freq="10min")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
if frame_or_series is DataFrame:
|
||||
ts = ts.to_frame()
|
||||
|
||||
stime, etime = ("08:00:00", "09:00:00")
|
||||
expected_length = 7
|
||||
|
||||
assert len(ts.between_time(stime, etime)) == expected_length
|
||||
assert len(ts.between_time(stime, etime, axis=0)) == expected_length
|
||||
msg = f"No axis named {ts.ndim} for object type {type(ts).__name__}"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.between_time(stime, etime, axis=ts.ndim)
|
||||
|
||||
def test_between_time_axis_aliases(self, axis):
|
||||
# GH#8839
|
||||
rng = date_range("1/1/2000", periods=100, freq="10min")
|
||||
ts = DataFrame(np.random.randn(len(rng), len(rng)))
|
||||
stime, etime = ("08:00:00", "09:00:00")
|
||||
exp_len = 7
|
||||
|
||||
if axis in ["index", 0]:
|
||||
ts.index = rng
|
||||
assert len(ts.between_time(stime, etime)) == exp_len
|
||||
assert len(ts.between_time(stime, etime, axis=0)) == exp_len
|
||||
|
||||
if axis in ["columns", 1]:
|
||||
ts.columns = rng
|
||||
selected = ts.between_time(stime, etime, axis=1).columns
|
||||
assert len(selected) == exp_len
|
||||
|
||||
def test_between_time_axis_raises(self, axis):
|
||||
# issue 8839
|
||||
rng = date_range("1/1/2000", periods=100, freq="10min")
|
||||
mask = np.arange(0, len(rng))
|
||||
rand_data = np.random.randn(len(rng), len(rng))
|
||||
ts = DataFrame(rand_data, index=rng, columns=rng)
|
||||
stime, etime = ("08:00:00", "09:00:00")
|
||||
|
||||
msg = "Index must be DatetimeIndex"
|
||||
if axis in ["columns", 1]:
|
||||
ts.index = mask
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ts.between_time(stime, etime)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ts.between_time(stime, etime, axis=0)
|
||||
|
||||
if axis in ["index", 0]:
|
||||
ts.columns = mask
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ts.between_time(stime, etime, axis=1)
|
||||
|
||||
def test_between_time_datetimeindex(self):
|
||||
index = date_range("2012-01-01", "2012-01-05", freq="30min")
|
||||
df = DataFrame(np.random.randn(len(index), 5), index=index)
|
||||
bkey = slice(time(13, 0, 0), time(14, 0, 0))
|
||||
binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]
|
||||
|
||||
result = df.between_time(bkey.start, bkey.stop)
|
||||
expected = df.loc[bkey]
|
||||
expected2 = df.iloc[binds]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result, expected2)
|
||||
assert len(result) == 12
|
||||
|
||||
@pytest.mark.parametrize("include_start", [True, False])
|
||||
@pytest.mark.parametrize("include_end", [True, False])
|
||||
def test_between_time_warn(self, include_start, include_end, frame_or_series):
|
||||
# GH40245
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
||||
ts = tm.get_obj(ts, frame_or_series)
|
||||
|
||||
stime = time(0, 0)
|
||||
etime = time(1, 0)
|
||||
|
||||
match = (
|
||||
"`include_start` and `include_end` "
|
||||
"are deprecated in favour of `inclusive`."
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
_ = ts.between_time(stime, etime, include_start, include_end)
|
||||
|
||||
def test_between_time_incorr_arg_inclusive(self):
|
||||
# GH40245
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
||||
|
||||
stime = time(0, 0)
|
||||
etime = time(1, 0)
|
||||
inclusive = "bad_string"
|
||||
msg = "Inclusive has to be either 'both', 'neither', 'left' or 'right'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.between_time(stime, etime, inclusive=inclusive)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"include_start, include_end", [(True, None), (True, True), (None, True)]
|
||||
)
|
||||
def test_between_time_incompatiable_args_given(self, include_start, include_end):
|
||||
# GH40245
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
||||
|
||||
stime = time(0, 0)
|
||||
etime = time(1, 0)
|
||||
msg = (
|
||||
"Deprecated arguments `include_start` and `include_end` cannot be "
|
||||
"passed if `inclusive` has been given."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.between_time(stime, etime, include_start, include_end, inclusive="left")
|
||||
|
||||
def test_between_time_same_functionality_old_and_new_args(self):
|
||||
# GH40245
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
|
||||
stime = time(0, 0)
|
||||
etime = time(1, 0)
|
||||
match = (
|
||||
"`include_start` and `include_end` "
|
||||
"are deprecated in favour of `inclusive`."
|
||||
)
|
||||
|
||||
result = ts.between_time(stime, etime)
|
||||
expected = ts.between_time(stime, etime, inclusive="both")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ts.between_time(stime, etime, include_start=False)
|
||||
expected = ts.between_time(stime, etime, inclusive="right")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ts.between_time(stime, etime, include_end=False)
|
||||
expected = ts.between_time(stime, etime, inclusive="left")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ts.between_time(
|
||||
stime, etime, include_start=False, include_end=False
|
||||
)
|
||||
expected = ts.between_time(stime, etime, inclusive="neither")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ts.between_time(stime, etime, include_start=True, include_end=True)
|
||||
expected = ts.between_time(stime, etime, inclusive="both")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
180
dist/client/pandas/tests/frame/methods/test_clip.py
vendored
Normal file
180
dist/client/pandas/tests/frame/methods/test_clip.py
vendored
Normal file
@@ -0,0 +1,180 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameClip:
|
||||
def test_clip(self, float_frame):
|
||||
median = float_frame.median().median()
|
||||
original = float_frame.copy()
|
||||
|
||||
double = float_frame.clip(upper=median, lower=median)
|
||||
assert not (double.values != median).any()
|
||||
|
||||
# Verify that float_frame was not changed inplace
|
||||
assert (float_frame.values == original.values).all()
|
||||
|
||||
def test_inplace_clip(self, float_frame):
|
||||
# GH#15388
|
||||
median = float_frame.median().median()
|
||||
frame_copy = float_frame.copy()
|
||||
|
||||
return_value = frame_copy.clip(upper=median, lower=median, inplace=True)
|
||||
assert return_value is None
|
||||
assert not (frame_copy.values != median).any()
|
||||
|
||||
def test_dataframe_clip(self):
|
||||
# GH#2747
|
||||
df = DataFrame(np.random.randn(1000, 2))
|
||||
|
||||
for lb, ub in [(-1, 1), (1, -1)]:
|
||||
clipped_df = df.clip(lb, ub)
|
||||
|
||||
lb, ub = min(lb, ub), max(ub, lb)
|
||||
lb_mask = df.values <= lb
|
||||
ub_mask = df.values >= ub
|
||||
mask = ~lb_mask & ~ub_mask
|
||||
assert (clipped_df.values[lb_mask] == lb).all()
|
||||
assert (clipped_df.values[ub_mask] == ub).all()
|
||||
assert (clipped_df.values[mask] == df.values[mask]).all()
|
||||
|
||||
def test_clip_mixed_numeric(self):
|
||||
# clip on mixed integer or floats
|
||||
# GH#24162, clipping now preserves numeric types per column
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]})
|
||||
result = df.clip(1, 2)
|
||||
expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"])
|
||||
expected = df.dtypes
|
||||
result = df.clip(upper=3).dtypes
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
def test_clip_against_series(self, inplace):
|
||||
# GH#6966
|
||||
|
||||
df = DataFrame(np.random.randn(1000, 2))
|
||||
lb = Series(np.random.randn(1000))
|
||||
ub = lb + 1
|
||||
|
||||
original = df.copy()
|
||||
clipped_df = df.clip(lb, ub, axis=0, inplace=inplace)
|
||||
|
||||
if inplace:
|
||||
clipped_df = df
|
||||
|
||||
for i in range(2):
|
||||
lb_mask = original.iloc[:, i] <= lb
|
||||
ub_mask = original.iloc[:, i] >= ub
|
||||
mask = ~lb_mask & ~ub_mask
|
||||
|
||||
result = clipped_df.loc[lb_mask, i]
|
||||
tm.assert_series_equal(result, lb[lb_mask], check_names=False)
|
||||
assert result.name == i
|
||||
|
||||
result = clipped_df.loc[ub_mask, i]
|
||||
tm.assert_series_equal(result, ub[ub_mask], check_names=False)
|
||||
assert result.name == i
|
||||
|
||||
tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
@pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])])
|
||||
@pytest.mark.parametrize(
|
||||
"axis,res",
|
||||
[
|
||||
(0, [[2.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 7.0, 7.0]]),
|
||||
(1, [[2.0, 3.0, 4.0], [4.0, 5.0, 6.0], [5.0, 6.0, 7.0]]),
|
||||
],
|
||||
)
|
||||
def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res):
|
||||
# GH#15390
|
||||
original = simple_frame.copy(deep=True)
|
||||
|
||||
result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace)
|
||||
|
||||
expected = DataFrame(res, columns=original.columns, index=original.index)
|
||||
if inplace:
|
||||
result = original
|
||||
tm.assert_frame_equal(result, expected, check_exact=True)
|
||||
|
||||
@pytest.mark.parametrize("axis", [0, 1, None])
|
||||
def test_clip_against_frame(self, axis):
|
||||
df = DataFrame(np.random.randn(1000, 2))
|
||||
lb = DataFrame(np.random.randn(1000, 2))
|
||||
ub = lb + 1
|
||||
|
||||
clipped_df = df.clip(lb, ub, axis=axis)
|
||||
|
||||
lb_mask = df <= lb
|
||||
ub_mask = df >= ub
|
||||
mask = ~lb_mask & ~ub_mask
|
||||
|
||||
tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
|
||||
tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
|
||||
tm.assert_frame_equal(clipped_df[mask], df[mask])
|
||||
|
||||
def test_clip_against_unordered_columns(self):
|
||||
# GH#20911
|
||||
df1 = DataFrame(np.random.randn(1000, 4), columns=["A", "B", "C", "D"])
|
||||
df2 = DataFrame(np.random.randn(1000, 4), columns=["D", "A", "B", "C"])
|
||||
df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"])
|
||||
result_upper = df1.clip(lower=0, upper=df2)
|
||||
expected_upper = df1.clip(lower=0, upper=df2[df1.columns])
|
||||
result_lower = df1.clip(lower=df3, upper=3)
|
||||
expected_lower = df1.clip(lower=df3[df1.columns], upper=3)
|
||||
result_lower_upper = df1.clip(lower=df3, upper=df2)
|
||||
expected_lower_upper = df1.clip(lower=df3[df1.columns], upper=df2[df1.columns])
|
||||
tm.assert_frame_equal(result_upper, expected_upper)
|
||||
tm.assert_frame_equal(result_lower, expected_lower)
|
||||
tm.assert_frame_equal(result_lower_upper, expected_lower_upper)
|
||||
|
||||
def test_clip_with_na_args(self, float_frame, using_array_manager):
|
||||
"""Should process np.nan argument as None"""
|
||||
# GH#17276
|
||||
tm.assert_frame_equal(float_frame.clip(np.nan), float_frame)
|
||||
tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame)
|
||||
|
||||
# GH#19992 and adjusted in GH#40420
|
||||
df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})
|
||||
|
||||
result = df.clip(lower=[4, 5, np.nan], axis=0)
|
||||
expected = DataFrame(
|
||||
{"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
warn = FutureWarning if using_array_manager else None
|
||||
with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
|
||||
result = df.clip(lower=[4, 5, np.nan], axis=1)
|
||||
expected = DataFrame(
|
||||
{"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH#40420
|
||||
data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
|
||||
df = DataFrame(data)
|
||||
t = Series([2, -4, np.NaN, 6, 3])
|
||||
result = df.clip(lower=t, axis=0)
|
||||
expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_clip_pos_args_deprecation(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/41485
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
msg = (
|
||||
r"In a future version of pandas all arguments of DataFrame.clip except "
|
||||
r"for the arguments 'lower' and 'upper' will be keyword-only"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.clip(0, 1, 0)
|
||||
expected = DataFrame({"a": [1, 1, 1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
47
dist/client/pandas/tests/frame/methods/test_combine.py
vendored
Normal file
47
dist/client/pandas/tests/frame/methods/test_combine.py
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCombine:
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
pd.date_range("2000", periods=4),
|
||||
pd.date_range("2000", periods=4, tz="US/Central"),
|
||||
pd.period_range("2000", periods=4),
|
||||
pd.timedelta_range(0, periods=4),
|
||||
],
|
||||
)
|
||||
def test_combine_datetlike_udf(self, data):
|
||||
# GH#23079
|
||||
df = pd.DataFrame({"A": data})
|
||||
other = df.copy()
|
||||
df.iloc[1, 0] = None
|
||||
|
||||
def combiner(a, b):
|
||||
return b
|
||||
|
||||
result = df.combine(other, combiner)
|
||||
tm.assert_frame_equal(result, other)
|
||||
|
||||
def test_combine_generic(self, float_frame):
|
||||
df1 = float_frame
|
||||
df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]]
|
||||
|
||||
combined = df1.combine(df2, np.add)
|
||||
combined2 = df2.combine(df1, np.add)
|
||||
assert combined["D"].isna().all()
|
||||
assert combined2["D"].isna().all()
|
||||
|
||||
chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]]
|
||||
chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]]
|
||||
|
||||
exp = (
|
||||
float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk)
|
||||
* 2
|
||||
)
|
||||
tm.assert_frame_equal(chunk, exp)
|
||||
tm.assert_frame_equal(chunk2, exp)
|
||||
528
dist/client/pandas/tests/frame/methods/test_combine_first.py
vendored
Normal file
528
dist/client/pandas/tests/frame/methods/test_combine_first.py
vendored
Normal file
@@ -0,0 +1,528 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.cast import (
|
||||
find_common_type,
|
||||
is_dtype_equal,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameCombineFirst:
|
||||
def test_combine_first_mixed(self):
|
||||
a = Series(["a", "b"], index=range(2))
|
||||
b = Series(range(2), index=range(2))
|
||||
f = DataFrame({"A": a, "B": b})
|
||||
|
||||
a = Series(["a", "b"], index=range(5, 7))
|
||||
b = Series(range(2), index=range(5, 7))
|
||||
g = DataFrame({"A": a, "B": b})
|
||||
|
||||
exp = DataFrame({"A": list("abab"), "B": [0, 1, 0, 1]}, index=[0, 1, 5, 6])
|
||||
combined = f.combine_first(g)
|
||||
tm.assert_frame_equal(combined, exp)
|
||||
|
||||
def test_combine_first(self, float_frame):
|
||||
# disjoint
|
||||
head, tail = float_frame[:5], float_frame[5:]
|
||||
|
||||
combined = head.combine_first(tail)
|
||||
reordered_frame = float_frame.reindex(combined.index)
|
||||
tm.assert_frame_equal(combined, reordered_frame)
|
||||
assert tm.equalContents(combined.columns, float_frame.columns)
|
||||
tm.assert_series_equal(combined["A"], reordered_frame["A"])
|
||||
|
||||
# same index
|
||||
fcopy = float_frame.copy()
|
||||
fcopy["A"] = 1
|
||||
del fcopy["C"]
|
||||
|
||||
fcopy2 = float_frame.copy()
|
||||
fcopy2["B"] = 0
|
||||
del fcopy2["D"]
|
||||
|
||||
combined = fcopy.combine_first(fcopy2)
|
||||
|
||||
assert (combined["A"] == 1).all()
|
||||
tm.assert_series_equal(combined["B"], fcopy["B"])
|
||||
tm.assert_series_equal(combined["C"], fcopy2["C"])
|
||||
tm.assert_series_equal(combined["D"], fcopy["D"])
|
||||
|
||||
# overlap
|
||||
head, tail = reordered_frame[:10].copy(), reordered_frame
|
||||
head["A"] = 1
|
||||
|
||||
combined = head.combine_first(tail)
|
||||
assert (combined["A"][:10] == 1).all()
|
||||
|
||||
# reverse overlap
|
||||
tail["A"][:10] = 0
|
||||
combined = tail.combine_first(head)
|
||||
assert (combined["A"][:10] == 0).all()
|
||||
|
||||
# no overlap
|
||||
f = float_frame[:10]
|
||||
g = float_frame[10:]
|
||||
combined = f.combine_first(g)
|
||||
tm.assert_series_equal(combined["A"].reindex(f.index), f["A"])
|
||||
tm.assert_series_equal(combined["A"].reindex(g.index), g["A"])
|
||||
|
||||
# corner cases
|
||||
comb = float_frame.combine_first(DataFrame())
|
||||
tm.assert_frame_equal(comb, float_frame)
|
||||
|
||||
comb = DataFrame().combine_first(float_frame)
|
||||
tm.assert_frame_equal(comb, float_frame)
|
||||
|
||||
comb = float_frame.combine_first(DataFrame(index=["faz", "boo"]))
|
||||
assert "faz" in comb.index
|
||||
|
||||
# #2525
|
||||
df = DataFrame({"a": [1]}, index=[datetime(2012, 1, 1)])
|
||||
df2 = DataFrame(columns=["b"])
|
||||
result = df.combine_first(df2)
|
||||
assert "b" in result
|
||||
|
||||
def test_combine_first_mixed_bug(self):
|
||||
idx = Index(["a", "b", "c", "e"])
|
||||
ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx)
|
||||
ser2 = Series(["a", "b", "c", "e"], index=idx)
|
||||
ser3 = Series([12, 4, 5, 97], index=idx)
|
||||
|
||||
frame1 = DataFrame({"col0": ser1, "col2": ser2, "col3": ser3})
|
||||
|
||||
idx = Index(["a", "b", "c", "f"])
|
||||
ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx)
|
||||
ser2 = Series(["a", "b", "c", "f"], index=idx)
|
||||
ser3 = Series([12, 4, 5, 97], index=idx)
|
||||
|
||||
frame2 = DataFrame({"col1": ser1, "col2": ser2, "col5": ser3})
|
||||
|
||||
combined = frame1.combine_first(frame2)
|
||||
assert len(combined.columns) == 5
|
||||
|
||||
def test_combine_first_same_as_in_update(self):
|
||||
# gh 3016 (same as in update)
|
||||
df = DataFrame(
|
||||
[[1.0, 2.0, False, True], [4.0, 5.0, True, False]],
|
||||
columns=["A", "B", "bool1", "bool2"],
|
||||
)
|
||||
|
||||
other = DataFrame([[45, 45]], index=[0], columns=["A", "B"])
|
||||
result = df.combine_first(other)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
df.loc[0, "A"] = np.nan
|
||||
result = df.combine_first(other)
|
||||
df.loc[0, "A"] = 45
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_combine_first_doc_example(self):
|
||||
# doc example
|
||||
df1 = DataFrame(
|
||||
{"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]}
|
||||
)
|
||||
|
||||
df2 = DataFrame(
|
||||
{
|
||||
"A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0],
|
||||
"B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0],
|
||||
}
|
||||
)
|
||||
|
||||
result = df1.combine_first(df2)
|
||||
expected = DataFrame({"A": [1, 2, 3, 5, 3, 7.0], "B": [np.nan, 2, 3, 4, 6, 8]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_combine_first_return_obj_type_with_bools(self):
|
||||
# GH3552
|
||||
|
||||
df1 = DataFrame(
|
||||
[[np.nan, 3.0, True], [-4.6, np.nan, True], [np.nan, 7.0, False]]
|
||||
)
|
||||
df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2])
|
||||
|
||||
expected = Series([True, True, False], name=2, dtype=bool)
|
||||
|
||||
result_12 = df1.combine_first(df2)[2]
|
||||
tm.assert_series_equal(result_12, expected)
|
||||
|
||||
result_21 = df2.combine_first(df1)[2]
|
||||
tm.assert_series_equal(result_21, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data1, data2, data_expected",
|
||||
(
|
||||
(
|
||||
[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
[pd.NaT, pd.NaT, pd.NaT],
|
||||
[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
),
|
||||
(
|
||||
[pd.NaT, pd.NaT, pd.NaT],
|
||||
[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
),
|
||||
(
|
||||
[datetime(2000, 1, 2), pd.NaT, pd.NaT],
|
||||
[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
[datetime(2000, 1, 2), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
),
|
||||
(
|
||||
[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
[datetime(2000, 1, 2), pd.NaT, pd.NaT],
|
||||
[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_combine_first_convert_datatime_correctly(
|
||||
self, data1, data2, data_expected
|
||||
):
|
||||
# GH 3593
|
||||
|
||||
df1, df2 = DataFrame({"a": data1}), DataFrame({"a": data2})
|
||||
result = df1.combine_first(df2)
|
||||
expected = DataFrame({"a": data_expected})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_combine_first_align_nan(self):
|
||||
# GH 7509 (not fixed)
|
||||
dfa = DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"])
|
||||
dfb = DataFrame([[4], [5]], columns=["b"])
|
||||
assert dfa["a"].dtype == "datetime64[ns]"
|
||||
assert dfa["b"].dtype == "int64"
|
||||
|
||||
res = dfa.combine_first(dfb)
|
||||
exp = DataFrame(
|
||||
{"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2, 5]},
|
||||
columns=["a", "b"],
|
||||
)
|
||||
tm.assert_frame_equal(res, exp)
|
||||
assert res["a"].dtype == "datetime64[ns]"
|
||||
# TODO: this must be int64
|
||||
assert res["b"].dtype == "int64"
|
||||
|
||||
res = dfa.iloc[:0].combine_first(dfb)
|
||||
exp = DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
# TODO: this must be datetime64
|
||||
assert res["a"].dtype == "float64"
|
||||
# TODO: this must be int64
|
||||
assert res["b"].dtype == "int64"
|
||||
|
||||
def test_combine_first_timezone(self):
|
||||
# see gh-7630
|
||||
data1 = pd.to_datetime("20100101 01:01").tz_localize("UTC")
|
||||
df1 = DataFrame(
|
||||
columns=["UTCdatetime", "abc"],
|
||||
data=data1,
|
||||
index=pd.date_range("20140627", periods=1),
|
||||
)
|
||||
data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC")
|
||||
df2 = DataFrame(
|
||||
columns=["UTCdatetime", "xyz"],
|
||||
data=data2,
|
||||
index=pd.date_range("20140628", periods=1),
|
||||
)
|
||||
res = df2[["UTCdatetime"]].combine_first(df1)
|
||||
exp = DataFrame(
|
||||
{
|
||||
"UTCdatetime": [
|
||||
pd.Timestamp("2010-01-01 01:01", tz="UTC"),
|
||||
pd.Timestamp("2012-12-12 12:12", tz="UTC"),
|
||||
],
|
||||
"abc": [pd.Timestamp("2010-01-01 01:01:00", tz="UTC"), pd.NaT],
|
||||
},
|
||||
columns=["UTCdatetime", "abc"],
|
||||
index=pd.date_range("20140627", periods=2, freq="D"),
|
||||
)
|
||||
assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]"
|
||||
assert res["abc"].dtype == "datetime64[ns, UTC]"
|
||||
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
# see gh-10567
|
||||
dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC")
|
||||
df1 = DataFrame({"DATE": dts1})
|
||||
dts2 = pd.date_range("2015-01-03", "2015-01-05", tz="UTC")
|
||||
df2 = DataFrame({"DATE": dts2})
|
||||
|
||||
res = df1.combine_first(df2)
|
||||
tm.assert_frame_equal(res, df1)
|
||||
assert res["DATE"].dtype == "datetime64[ns, UTC]"
|
||||
|
||||
dts1 = pd.DatetimeIndex(
|
||||
["2011-01-01", "NaT", "2011-01-03", "2011-01-04"], tz="US/Eastern"
|
||||
)
|
||||
df1 = DataFrame({"DATE": dts1}, index=[1, 3, 5, 7])
|
||||
dts2 = pd.DatetimeIndex(
|
||||
["2012-01-01", "2012-01-02", "2012-01-03"], tz="US/Eastern"
|
||||
)
|
||||
df2 = DataFrame({"DATE": dts2}, index=[2, 4, 5])
|
||||
|
||||
res = df1.combine_first(df2)
|
||||
exp_dts = pd.DatetimeIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2012-01-01",
|
||||
"NaT",
|
||||
"2012-01-02",
|
||||
"2011-01-03",
|
||||
"2011-01-04",
|
||||
],
|
||||
tz="US/Eastern",
|
||||
)
|
||||
exp = DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
# different tz
|
||||
dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="US/Eastern")
|
||||
df1 = DataFrame({"DATE": dts1})
|
||||
dts2 = pd.date_range("2015-01-03", "2015-01-05")
|
||||
df2 = DataFrame({"DATE": dts2})
|
||||
|
||||
# if df1 doesn't have NaN, keep its dtype
|
||||
res = df1.combine_first(df2)
|
||||
tm.assert_frame_equal(res, df1)
|
||||
assert res["DATE"].dtype == "datetime64[ns, US/Eastern]"
|
||||
|
||||
dts1 = pd.date_range("2015-01-01", "2015-01-02", tz="US/Eastern")
|
||||
df1 = DataFrame({"DATE": dts1})
|
||||
dts2 = pd.date_range("2015-01-01", "2015-01-03")
|
||||
df2 = DataFrame({"DATE": dts2})
|
||||
|
||||
res = df1.combine_first(df2)
|
||||
exp_dts = [
|
||||
pd.Timestamp("2015-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2015-01-02", tz="US/Eastern"),
|
||||
pd.Timestamp("2015-01-03"),
|
||||
]
|
||||
exp = DataFrame({"DATE": exp_dts})
|
||||
tm.assert_frame_equal(res, exp)
|
||||
assert res["DATE"].dtype == "object"
|
||||
|
||||
def test_combine_first_timedelta(self):
|
||||
data1 = pd.TimedeltaIndex(["1 day", "NaT", "3 day", "4day"])
|
||||
df1 = DataFrame({"TD": data1}, index=[1, 3, 5, 7])
|
||||
data2 = pd.TimedeltaIndex(["10 day", "11 day", "12 day"])
|
||||
df2 = DataFrame({"TD": data2}, index=[2, 4, 5])
|
||||
|
||||
res = df1.combine_first(df2)
|
||||
exp_dts = pd.TimedeltaIndex(
|
||||
["1 day", "10 day", "NaT", "11 day", "3 day", "4 day"]
|
||||
)
|
||||
exp = DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
assert res["TD"].dtype == "timedelta64[ns]"
|
||||
|
||||
def test_combine_first_period(self):
|
||||
data1 = pd.PeriodIndex(["2011-01", "NaT", "2011-03", "2011-04"], freq="M")
|
||||
df1 = DataFrame({"P": data1}, index=[1, 3, 5, 7])
|
||||
data2 = pd.PeriodIndex(["2012-01-01", "2012-02", "2012-03"], freq="M")
|
||||
df2 = DataFrame({"P": data2}, index=[2, 4, 5])
|
||||
|
||||
res = df1.combine_first(df2)
|
||||
exp_dts = pd.PeriodIndex(
|
||||
["2011-01", "2012-01", "NaT", "2012-02", "2011-03", "2011-04"], freq="M"
|
||||
)
|
||||
exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
assert res["P"].dtype == data1.dtype
|
||||
|
||||
# different freq
|
||||
dts2 = pd.PeriodIndex(["2012-01-01", "2012-01-02", "2012-01-03"], freq="D")
|
||||
df2 = DataFrame({"P": dts2}, index=[2, 4, 5])
|
||||
|
||||
res = df1.combine_first(df2)
|
||||
exp_dts = [
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2012-01-01", freq="D"),
|
||||
pd.NaT,
|
||||
pd.Period("2012-01-02", freq="D"),
|
||||
pd.Period("2011-03", freq="M"),
|
||||
pd.Period("2011-04", freq="M"),
|
||||
]
|
||||
exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
assert res["P"].dtype == "object"
|
||||
|
||||
def test_combine_first_int(self):
|
||||
# GH14687 - integer series that do no align exactly
|
||||
|
||||
df1 = DataFrame({"a": [0, 1, 3, 5]}, dtype="int64")
|
||||
df2 = DataFrame({"a": [1, 4]}, dtype="int64")
|
||||
|
||||
result_12 = df1.combine_first(df2)
|
||||
expected_12 = DataFrame({"a": [0, 1, 3, 5]})
|
||||
tm.assert_frame_equal(result_12, expected_12)
|
||||
|
||||
result_21 = df2.combine_first(df1)
|
||||
expected_21 = DataFrame({"a": [1, 4, 3, 5]})
|
||||
tm.assert_frame_equal(result_21, expected_21)
|
||||
|
||||
@pytest.mark.parametrize("val", [1, 1.0])
|
||||
def test_combine_first_with_asymmetric_other(self, val):
|
||||
# see gh-20699
|
||||
df1 = DataFrame({"isNum": [val]})
|
||||
df2 = DataFrame({"isBool": [True]})
|
||||
|
||||
res = df1.combine_first(df2)
|
||||
exp = DataFrame({"isBool": [True], "isNum": [val]})
|
||||
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
|
||||
# GH: 37519
|
||||
df = DataFrame(
|
||||
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
|
||||
)
|
||||
df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype)
|
||||
df.set_index(["a", "b"], inplace=True)
|
||||
df2.set_index(["a", "b"], inplace=True)
|
||||
result = df.combine_first(df2)
|
||||
expected = DataFrame(
|
||||
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
|
||||
).set_index(["a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"scalar1, scalar2",
|
||||
[
|
||||
(datetime(2020, 1, 1), datetime(2020, 1, 2)),
|
||||
(pd.Period("2020-01-01", "D"), pd.Period("2020-01-02", "D")),
|
||||
(pd.Timedelta("89 days"), pd.Timedelta("60 min")),
|
||||
(pd.Interval(left=0, right=1), pd.Interval(left=2, right=3, closed="left")),
|
||||
],
|
||||
)
|
||||
def test_combine_first_timestamp_bug(scalar1, scalar2, nulls_fixture):
|
||||
# GH28481
|
||||
na_value = nulls_fixture
|
||||
|
||||
frame = DataFrame([[na_value, na_value]], columns=["a", "b"])
|
||||
other = DataFrame([[scalar1, scalar2]], columns=["b", "c"])
|
||||
|
||||
common_dtype = find_common_type([frame.dtypes["b"], other.dtypes["b"]])
|
||||
|
||||
if is_dtype_equal(common_dtype, "object") or frame.dtypes["b"] == other.dtypes["b"]:
|
||||
val = scalar1
|
||||
else:
|
||||
val = na_value
|
||||
|
||||
result = frame.combine_first(other)
|
||||
|
||||
expected = DataFrame([[na_value, val, scalar2]], columns=["a", "b", "c"])
|
||||
|
||||
expected["b"] = expected["b"].astype(common_dtype)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_combine_first_timestamp_bug_NaT():
|
||||
# GH28481
|
||||
frame = DataFrame([[pd.NaT, pd.NaT]], columns=["a", "b"])
|
||||
other = DataFrame(
|
||||
[[datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["b", "c"]
|
||||
)
|
||||
|
||||
result = frame.combine_first(other)
|
||||
expected = DataFrame(
|
||||
[[pd.NaT, datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["a", "b", "c"]
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_combine_first_with_nan_multiindex():
|
||||
# gh-36562
|
||||
|
||||
mi1 = MultiIndex.from_arrays(
|
||||
[["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"]
|
||||
)
|
||||
df = DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1)
|
||||
mi2 = MultiIndex.from_arrays(
|
||||
[["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"]
|
||||
)
|
||||
s = Series([1, 2, 3, 4, 5, 6], index=mi2)
|
||||
res = df.combine_first(DataFrame({"d": s}))
|
||||
mi_expected = MultiIndex.from_arrays(
|
||||
[
|
||||
["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan],
|
||||
[1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6],
|
||||
],
|
||||
names=["a", "b"],
|
||||
)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1],
|
||||
"d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan],
|
||||
},
|
||||
index=mi_expected,
|
||||
)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
|
||||
def test_combine_preserve_dtypes():
|
||||
# GH7509
|
||||
a_column = Series(["a", "b"], index=range(2))
|
||||
b_column = Series(range(2), index=range(2))
|
||||
df1 = DataFrame({"A": a_column, "B": b_column})
|
||||
|
||||
c_column = Series(["a", "b"], index=range(5, 7))
|
||||
b_column = Series(range(-1, 1), index=range(5, 7))
|
||||
df2 = DataFrame({"B": b_column, "C": c_column})
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": ["a", "b", np.nan, np.nan],
|
||||
"B": [0, 1, -1, 0],
|
||||
"C": [np.nan, np.nan, "a", "b"],
|
||||
},
|
||||
index=[0, 1, 5, 6],
|
||||
)
|
||||
combined = df1.combine_first(df2)
|
||||
tm.assert_frame_equal(combined, expected)
|
||||
|
||||
|
||||
def test_combine_first_duplicates_rows_for_nan_index_values():
|
||||
# GH39881
|
||||
df1 = DataFrame(
|
||||
{"x": [9, 10, 11]},
|
||||
index=MultiIndex.from_arrays([[1, 2, 3], [np.nan, 5, 6]], names=["a", "b"]),
|
||||
)
|
||||
|
||||
df2 = DataFrame(
|
||||
{"y": [12, 13, 14]},
|
||||
index=MultiIndex.from_arrays([[1, 2, 4], [np.nan, 5, 7]], names=["a", "b"]),
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"x": [9.0, 10.0, 11.0, np.nan],
|
||||
"y": [12.0, 13.0, np.nan, 14.0],
|
||||
},
|
||||
index=MultiIndex.from_arrays(
|
||||
[[1, 2, 3, 4], [np.nan, 5.0, 6.0, 7.0]], names=["a", "b"]
|
||||
),
|
||||
)
|
||||
combined = df1.combine_first(df2)
|
||||
tm.assert_frame_equal(combined, expected)
|
||||
|
||||
|
||||
def test_combine_first_int64_not_cast_to_float64():
|
||||
# GH 28613
|
||||
df_1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
|
||||
df_2 = DataFrame({"A": [1, 20, 30], "B": [40, 50, 60], "C": [12, 34, 65]})
|
||||
result = df_1.combine_first(df_2)
|
||||
expected = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [12, 34, 65]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
182
dist/client/pandas/tests/frame/methods/test_compare.py
vendored
Normal file
182
dist/client/pandas/tests/frame/methods/test_compare.py
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
|
||||
def test_compare_axis(align_axis):
|
||||
# GH#30429
|
||||
df = pd.DataFrame(
|
||||
{"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
|
||||
columns=["col1", "col2", "col3"],
|
||||
)
|
||||
df2 = df.copy()
|
||||
df2.loc[0, "col1"] = "c"
|
||||
df2.loc[2, "col3"] = 4.0
|
||||
|
||||
result = df.compare(df2, align_axis=align_axis)
|
||||
|
||||
if align_axis in (1, "columns"):
|
||||
indices = pd.Index([0, 2])
|
||||
columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
|
||||
expected = pd.DataFrame(
|
||||
[["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]],
|
||||
index=indices,
|
||||
columns=columns,
|
||||
)
|
||||
else:
|
||||
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
|
||||
columns = pd.Index(["col1", "col3"])
|
||||
expected = pd.DataFrame(
|
||||
[["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]],
|
||||
index=indices,
|
||||
columns=columns,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep_shape, keep_equal",
|
||||
[
|
||||
(True, False),
|
||||
(False, True),
|
||||
(True, True),
|
||||
# False, False case is already covered in test_compare_axis
|
||||
],
|
||||
)
|
||||
def test_compare_various_formats(keep_shape, keep_equal):
|
||||
df = pd.DataFrame(
|
||||
{"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
|
||||
columns=["col1", "col2", "col3"],
|
||||
)
|
||||
df2 = df.copy()
|
||||
df2.loc[0, "col1"] = "c"
|
||||
df2.loc[2, "col3"] = 4.0
|
||||
|
||||
result = df.compare(df2, keep_shape=keep_shape, keep_equal=keep_equal)
|
||||
|
||||
if keep_shape:
|
||||
indices = pd.Index([0, 1, 2])
|
||||
columns = pd.MultiIndex.from_product(
|
||||
[["col1", "col2", "col3"], ["self", "other"]]
|
||||
)
|
||||
if keep_equal:
|
||||
expected = pd.DataFrame(
|
||||
[
|
||||
["a", "c", 1.0, 1.0, 1.0, 1.0],
|
||||
["b", "b", 2.0, 2.0, 2.0, 2.0],
|
||||
["c", "c", np.nan, np.nan, 3.0, 4.0],
|
||||
],
|
||||
index=indices,
|
||||
columns=columns,
|
||||
)
|
||||
else:
|
||||
expected = pd.DataFrame(
|
||||
[
|
||||
["a", "c", np.nan, np.nan, np.nan, np.nan],
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
|
||||
[np.nan, np.nan, np.nan, np.nan, 3.0, 4.0],
|
||||
],
|
||||
index=indices,
|
||||
columns=columns,
|
||||
)
|
||||
else:
|
||||
indices = pd.Index([0, 2])
|
||||
columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
|
||||
expected = pd.DataFrame(
|
||||
[["a", "c", 1.0, 1.0], ["c", "c", 3.0, 4.0]], index=indices, columns=columns
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_with_equal_nulls():
|
||||
# We want to make sure two NaNs are considered the same
|
||||
# and dropped where applicable
|
||||
df = pd.DataFrame(
|
||||
{"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
|
||||
columns=["col1", "col2", "col3"],
|
||||
)
|
||||
df2 = df.copy()
|
||||
df2.loc[0, "col1"] = "c"
|
||||
|
||||
result = df.compare(df2)
|
||||
indices = pd.Index([0])
|
||||
columns = pd.MultiIndex.from_product([["col1"], ["self", "other"]])
|
||||
expected = pd.DataFrame([["a", "c"]], index=indices, columns=columns)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_with_non_equal_nulls():
|
||||
# We want to make sure the relevant NaNs do not get dropped
|
||||
# even if the entire row or column are NaNs
|
||||
df = pd.DataFrame(
|
||||
{"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
|
||||
columns=["col1", "col2", "col3"],
|
||||
)
|
||||
df2 = df.copy()
|
||||
df2.loc[0, "col1"] = "c"
|
||||
df2.loc[2, "col3"] = np.nan
|
||||
|
||||
result = df.compare(df2)
|
||||
|
||||
indices = pd.Index([0, 2])
|
||||
columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
|
||||
expected = pd.DataFrame(
|
||||
[["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, np.nan]],
|
||||
index=indices,
|
||||
columns=columns,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("align_axis", [0, 1])
|
||||
def test_compare_multi_index(align_axis):
|
||||
df = pd.DataFrame(
|
||||
{"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}
|
||||
)
|
||||
df.columns = pd.MultiIndex.from_arrays([["a", "a", "b"], ["col1", "col2", "col3"]])
|
||||
df.index = pd.MultiIndex.from_arrays([["x", "x", "y"], [0, 1, 2]])
|
||||
|
||||
df2 = df.copy()
|
||||
df2.iloc[0, 0] = "c"
|
||||
df2.iloc[2, 2] = 4.0
|
||||
|
||||
result = df.compare(df2, align_axis=align_axis)
|
||||
|
||||
if align_axis == 0:
|
||||
indices = pd.MultiIndex.from_arrays(
|
||||
[["x", "x", "y", "y"], [0, 0, 2, 2], ["self", "other", "self", "other"]]
|
||||
)
|
||||
columns = pd.MultiIndex.from_arrays([["a", "b"], ["col1", "col3"]])
|
||||
data = [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]]
|
||||
else:
|
||||
indices = pd.MultiIndex.from_arrays([["x", "y"], [0, 2]])
|
||||
columns = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
["a", "a", "b", "b"],
|
||||
["col1", "col1", "col3", "col3"],
|
||||
["self", "other", "self", "other"],
|
||||
]
|
||||
)
|
||||
data = [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]]
|
||||
|
||||
expected = pd.DataFrame(data=data, index=indices, columns=columns)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_unaligned_objects():
|
||||
# test DataFrames with different indices
|
||||
msg = "Can only compare identically-labeled DataFrame objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"])
|
||||
df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"])
|
||||
df1.compare(df2)
|
||||
|
||||
# test DataFrames with different shapes
|
||||
msg = "Can only compare identically-labeled DataFrame objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df1 = pd.DataFrame(np.ones((3, 3)))
|
||||
df2 = pd.DataFrame(np.zeros((2, 1)))
|
||||
df1.compare(df2)
|
||||
59
dist/client/pandas/tests/frame/methods/test_convert.py
vendored
Normal file
59
dist/client/pandas/tests/frame/methods/test_convert.py
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestConvert:
|
||||
def test_convert_objects(self, float_string_frame):
|
||||
|
||||
oops = float_string_frame.T.T
|
||||
converted = oops._convert(datetime=True)
|
||||
tm.assert_frame_equal(converted, float_string_frame)
|
||||
assert converted["A"].dtype == np.float64
|
||||
|
||||
# force numeric conversion
|
||||
float_string_frame["H"] = "1."
|
||||
float_string_frame["I"] = "1"
|
||||
|
||||
# add in some items that will be nan
|
||||
length = len(float_string_frame)
|
||||
float_string_frame["J"] = "1."
|
||||
float_string_frame["K"] = "1"
|
||||
float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled"
|
||||
converted = float_string_frame._convert(datetime=True, numeric=True)
|
||||
assert converted["H"].dtype == "float64"
|
||||
assert converted["I"].dtype == "int64"
|
||||
assert converted["J"].dtype == "float64"
|
||||
assert converted["K"].dtype == "float64"
|
||||
assert len(converted["J"].dropna()) == length - 5
|
||||
assert len(converted["K"].dropna()) == length - 5
|
||||
|
||||
# via astype
|
||||
converted = float_string_frame.copy()
|
||||
converted["H"] = converted["H"].astype("float64")
|
||||
converted["I"] = converted["I"].astype("int64")
|
||||
assert converted["H"].dtype == "float64"
|
||||
assert converted["I"].dtype == "int64"
|
||||
|
||||
# via astype, but errors
|
||||
converted = float_string_frame.copy()
|
||||
with pytest.raises(ValueError, match="invalid literal"):
|
||||
converted["H"].astype("int32")
|
||||
|
||||
def test_convert_mixed_single_column(self):
|
||||
# GH#4119, not converting a mixed type (e.g.floats and object)
|
||||
# mixed in a single column
|
||||
df = DataFrame({"s": Series([1, "na", 3, 4])})
|
||||
result = df._convert(datetime=True, numeric=True)
|
||||
expected = DataFrame({"s": Series([1, np.nan, 3, 4])})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_convert_objects_no_conversion(self):
|
||||
mixed1 = DataFrame({"a": [1, 2, 3], "b": [4.0, 5, 6], "c": ["x", "y", "z"]})
|
||||
mixed2 = mixed1._convert(datetime=True)
|
||||
tm.assert_frame_equal(mixed1, mixed2)
|
||||
43
dist/client/pandas/tests/frame/methods/test_convert_dtypes.py
vendored
Normal file
43
dist/client/pandas/tests/frame/methods/test_convert_dtypes.py
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestConvertDtypes:
|
||||
@pytest.mark.parametrize(
|
||||
"convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
|
||||
)
|
||||
def test_convert_dtypes(self, convert_integer, expected, string_storage):
|
||||
# Specific types are tested in tests/series/test_dtypes.py
|
||||
# Just check that it works for DataFrame here
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
|
||||
"b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
|
||||
}
|
||||
)
|
||||
with pd.option_context("string_storage", string_storage):
|
||||
result = df.convert_dtypes(True, True, convert_integer, False)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"a": pd.Series([1, 2, 3], dtype=expected),
|
||||
"b": pd.Series(["x", "y", "z"], dtype=f"string[{string_storage}]"),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_convert_empty(self):
|
||||
# Empty DataFrame can pass convert_dtypes, see GH#40393
|
||||
empty_df = pd.DataFrame()
|
||||
tm.assert_frame_equal(empty_df, empty_df.convert_dtypes())
|
||||
|
||||
def test_convert_dtypes_retain_column_names(self):
|
||||
# GH#41435
|
||||
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
df.columns.name = "cols"
|
||||
|
||||
result = df.convert_dtypes()
|
||||
tm.assert_index_equal(result.columns, df.columns)
|
||||
assert result.columns.name == "cols"
|
||||
63
dist/client/pandas/tests/frame/methods/test_copy.py
vendored
Normal file
63
dist/client/pandas/tests/frame/methods/test_copy.py
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCopy:
|
||||
@pytest.mark.parametrize("attr", ["index", "columns"])
|
||||
def test_copy_index_name_checking(self, float_frame, attr):
|
||||
# don't want to be able to modify the index stored elsewhere after
|
||||
# making a copy
|
||||
ind = getattr(float_frame, attr)
|
||||
ind.name = None
|
||||
cp = float_frame.copy()
|
||||
getattr(cp, attr).name = "foo"
|
||||
assert getattr(float_frame, attr).name is None
|
||||
|
||||
def test_copy_cache(self):
|
||||
# GH#31784 _item_cache not cleared on copy causes incorrect reads after updates
|
||||
df = DataFrame({"a": [1]})
|
||||
|
||||
df["x"] = [0]
|
||||
df["a"]
|
||||
|
||||
df.copy()
|
||||
|
||||
df["a"].values[0] = -1
|
||||
|
||||
tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0]}))
|
||||
|
||||
df["y"] = [0]
|
||||
|
||||
assert df["a"].values[0] == -1
|
||||
tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0], "y": [0]}))
|
||||
|
||||
def test_copy(self, float_frame, float_string_frame):
|
||||
cop = float_frame.copy()
|
||||
cop["E"] = cop["A"]
|
||||
assert "E" not in float_frame
|
||||
|
||||
# copy objects
|
||||
copy = float_string_frame.copy()
|
||||
assert copy._mgr is not float_string_frame._mgr
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_copy_consolidates(self):
|
||||
# GH#42477
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.random.randint(0, 100, size=55),
|
||||
"b": np.random.randint(0, 100, size=55),
|
||||
}
|
||||
)
|
||||
|
||||
for i in range(0, 10):
|
||||
df.loc[:, f"n_{i}"] = np.random.randint(0, 100, size=55)
|
||||
|
||||
assert len(df._mgr.blocks) == 11
|
||||
result = df.copy()
|
||||
assert len(result._mgr.blocks) == 1
|
||||
39
dist/client/pandas/tests/frame/methods/test_count.py
vendored
Normal file
39
dist/client/pandas/tests/frame/methods/test_count.py
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameCount:
|
||||
def test_count(self):
|
||||
# corner case
|
||||
frame = DataFrame()
|
||||
ct1 = frame.count(1)
|
||||
assert isinstance(ct1, Series)
|
||||
|
||||
ct2 = frame.count(0)
|
||||
assert isinstance(ct2, Series)
|
||||
|
||||
# GH#423
|
||||
df = DataFrame(index=range(10))
|
||||
result = df.count(1)
|
||||
expected = Series(0, index=df.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = DataFrame(columns=range(10))
|
||||
result = df.count(0)
|
||||
expected = Series(0, index=df.columns)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = DataFrame()
|
||||
result = df.count()
|
||||
expected = Series(0, index=[])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_count_objects(self, float_string_frame):
|
||||
dm = DataFrame(float_string_frame._series)
|
||||
df = DataFrame(float_string_frame._series)
|
||||
|
||||
tm.assert_series_equal(dm.count(), df.count())
|
||||
tm.assert_series_equal(dm.count(1), df.count(1))
|
||||
123
dist/client/pandas/tests/frame/methods/test_count_with_level_deprecated.py
vendored
Normal file
123
dist/client/pandas/tests/frame/methods/test_count_with_level_deprecated.py
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameCount:
|
||||
def test_count_multiindex(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
frame = frame.copy()
|
||||
frame.index.names = ["a", "b"]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = frame.count(level="b")
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
expected = frame.count(level=1)
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = frame.count(level="a")
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
expected = frame.count(level=0)
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
|
||||
msg = "Level x not found"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
frame.count(level="x")
|
||||
|
||||
def test_count_level_corner(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
ser = frame["A"][:0]
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ser.count(level=0)
|
||||
expected = Series(0, index=ser.index.levels[0], name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = frame[:0]
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = df.count(level=0)
|
||||
expected = (
|
||||
DataFrame(
|
||||
index=ser.index.levels[0].set_names(["first"]), columns=df.columns
|
||||
)
|
||||
.fillna(0)
|
||||
.astype(np.int64)
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_count_index_with_nan(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/21824
|
||||
df = DataFrame(
|
||||
{
|
||||
"Person": ["John", "Myla", None, "John", "Myla"],
|
||||
"Age": [24.0, 5, 21.0, 33, 26],
|
||||
"Single": [False, True, True, True, False],
|
||||
}
|
||||
)
|
||||
|
||||
# count on row labels
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
res = df.set_index(["Person", "Single"]).count(level="Person")
|
||||
expected = DataFrame(
|
||||
index=Index(["John", "Myla"], name="Person"),
|
||||
columns=Index(["Age"]),
|
||||
data=[2, 2],
|
||||
)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
# count on column labels
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1)
|
||||
expected = DataFrame(
|
||||
columns=Index(["John", "Myla"], name="Person"),
|
||||
index=Index(["Age"]),
|
||||
data=[[2, 2]],
|
||||
)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_count_level(
|
||||
self,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
multiindex_dataframe_random_data,
|
||||
):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
def _check_counts(frame, axis=0):
|
||||
index = frame._get_axis(axis)
|
||||
for i in range(index.nlevels):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = frame.count(axis=axis, level=i)
|
||||
expected = frame.groupby(axis=axis, level=i).count()
|
||||
expected = expected.reindex_like(result).astype("i8")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
frame.iloc[1, [1, 2]] = np.nan
|
||||
frame.iloc[7, [0, 1]] = np.nan
|
||||
ymd.iloc[1, [1, 2]] = np.nan
|
||||
ymd.iloc[7, [0, 1]] = np.nan
|
||||
|
||||
_check_counts(frame)
|
||||
_check_counts(ymd)
|
||||
_check_counts(frame.T, axis=1)
|
||||
_check_counts(ymd.T, axis=1)
|
||||
|
||||
# can't call with level on regular DataFrame
|
||||
df = tm.makeTimeDataFrame()
|
||||
with pytest.raises(TypeError, match="hierarchical"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.count(level=0)
|
||||
|
||||
frame["D"] = "foo"
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = frame.count(level=0, numeric_only=True)
|
||||
tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp"))
|
||||
361
dist/client/pandas/tests/frame/methods/test_cov_corr.py
vendored
Normal file
361
dist/client/pandas/tests/frame/methods/test_cov_corr.py
vendored
Normal file
@@ -0,0 +1,361 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameCov:
|
||||
def test_cov(self, float_frame, float_string_frame):
|
||||
# min_periods no NAs (corner case)
|
||||
expected = float_frame.cov()
|
||||
result = float_frame.cov(min_periods=len(float_frame))
|
||||
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
result = float_frame.cov(min_periods=len(float_frame) + 1)
|
||||
assert isna(result.values).all()
|
||||
|
||||
# with NAs
|
||||
frame = float_frame.copy()
|
||||
frame["A"][:5] = np.nan
|
||||
frame["B"][5:10] = np.nan
|
||||
result = frame.cov(min_periods=len(frame) - 8)
|
||||
expected = frame.cov()
|
||||
expected.loc["A", "B"] = np.nan
|
||||
expected.loc["B", "A"] = np.nan
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# regular
|
||||
result = frame.cov()
|
||||
expected = frame["A"].cov(frame["C"])
|
||||
tm.assert_almost_equal(result["A"]["C"], expected)
|
||||
|
||||
# exclude non-numeric types
|
||||
result = float_string_frame.cov()
|
||||
expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Single column frame
|
||||
df = DataFrame(np.linspace(0.0, 1.0, 10))
|
||||
result = df.cov()
|
||||
expected = DataFrame(
|
||||
np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
df.loc[0] = np.nan
|
||||
result = df.cov()
|
||||
expected = DataFrame(
|
||||
np.cov(df.values[1:].T).reshape((1, 1)),
|
||||
index=df.columns,
|
||||
columns=df.columns,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3])
|
||||
def test_cov_ddof(self, test_ddof):
|
||||
# GH#34611
|
||||
np_array1 = np.random.rand(10)
|
||||
np_array2 = np.random.rand(10)
|
||||
df = DataFrame({0: np_array1, 1: np_array2})
|
||||
result = df.cov(ddof=test_ddof)
|
||||
expected_np = np.cov(np_array1, np_array2, ddof=test_ddof)
|
||||
expected = DataFrame(expected_np)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other_column", [pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0])]
|
||||
)
|
||||
def test_cov_nullable_integer(self, other_column):
|
||||
# https://github.com/pandas-dev/pandas/issues/33803
|
||||
data = DataFrame({"a": pd.array([1, 2, None]), "b": other_column})
|
||||
result = data.cov()
|
||||
arr = np.array([[0.5, 0.5], [0.5, 1.0]])
|
||||
expected = DataFrame(arr, columns=["a", "b"], index=["a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestDataFrameCorr:
|
||||
# DataFrame.corr(), as opposed to DataFrame.corrwith
|
||||
|
||||
@pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"])
|
||||
@td.skip_if_no_scipy
|
||||
def test_corr_scipy_method(self, float_frame, method):
|
||||
float_frame["A"][:5] = np.nan
|
||||
float_frame["B"][5:10] = np.nan
|
||||
float_frame["A"][:10] = float_frame["A"][10:20]
|
||||
|
||||
correls = float_frame.corr(method=method)
|
||||
expected = float_frame["A"].corr(float_frame["C"], method=method)
|
||||
tm.assert_almost_equal(correls["A"]["C"], expected)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def test_corr_non_numeric(self, float_string_frame):
|
||||
# exclude non-numeric types
|
||||
result = float_string_frame.corr()
|
||||
expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"])
|
||||
def test_corr_nooverlap(self, meth):
|
||||
# nothing in common
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [1, 1.5, 1, np.nan, np.nan, np.nan],
|
||||
"B": [np.nan, np.nan, np.nan, 1, 1.5, 1],
|
||||
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
|
||||
}
|
||||
)
|
||||
rs = df.corr(meth)
|
||||
assert isna(rs.loc["A", "B"])
|
||||
assert isna(rs.loc["B", "A"])
|
||||
assert rs.loc["A", "A"] == 1
|
||||
assert rs.loc["B", "B"] == 1
|
||||
assert isna(rs.loc["C", "C"])
|
||||
|
||||
@pytest.mark.parametrize("meth", ["pearson", "spearman"])
|
||||
def test_corr_constant(self, meth):
|
||||
# constant --> all NA
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [1, 1, 1, np.nan, np.nan, np.nan],
|
||||
"B": [np.nan, np.nan, np.nan, 1, 1, 1],
|
||||
}
|
||||
)
|
||||
rs = df.corr(meth)
|
||||
assert isna(rs.values).all()
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"])
|
||||
def test_corr_int_and_boolean(self, meth):
|
||||
# when dtypes of pandas series are different
|
||||
# then ndarray will have dtype=object,
|
||||
# so it need to be properly handled
|
||||
df = DataFrame({"a": [True, False], "b": [1, 0]})
|
||||
|
||||
expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"])
|
||||
|
||||
with warnings.catch_warnings(record=True):
|
||||
warnings.simplefilter("ignore", RuntimeWarning)
|
||||
result = df.corr(meth)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("method", ["cov", "corr"])
|
||||
def test_corr_cov_independent_index_column(self, method):
|
||||
# GH#14617
|
||||
df = DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd"))
|
||||
result = getattr(df, method)()
|
||||
assert result.index is not result.columns
|
||||
assert result.index.equals(result.columns)
|
||||
|
||||
def test_corr_invalid_method(self):
|
||||
# GH#22298
|
||||
df = DataFrame(np.random.normal(size=(10, 2)))
|
||||
msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.corr(method="____")
|
||||
|
||||
def test_corr_int(self):
|
||||
# dtypes other than float64 GH#1761
|
||||
df = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})
|
||||
|
||||
df.cov()
|
||||
df.corr()
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize(
|
||||
"nullable_column", [pd.array([1, 2, 3]), pd.array([1, 2, None])]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"other_column",
|
||||
[pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, np.nan])],
|
||||
)
|
||||
@pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"])
|
||||
def test_corr_nullable_integer(self, nullable_column, other_column, method):
|
||||
# https://github.com/pandas-dev/pandas/issues/33803
|
||||
data = DataFrame({"a": nullable_column, "b": other_column})
|
||||
result = data.corr(method=method)
|
||||
expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_corr_item_cache(self):
|
||||
# Check that corr does not lead to incorrect entries in item_cache
|
||||
|
||||
df = DataFrame({"A": range(10)})
|
||||
df["B"] = range(10)[::-1]
|
||||
|
||||
ser = df["A"] # populate item_cache
|
||||
assert len(df._mgr.arrays) == 2 # i.e. 2 blocks
|
||||
|
||||
_ = df.corr()
|
||||
|
||||
# Check that the corr didn't break link between ser and df
|
||||
ser.values[0] = 99
|
||||
assert df.loc[0, "A"] == 99
|
||||
assert df["A"] is ser
|
||||
assert df.values[0, 0] == 99
|
||||
|
||||
@pytest.mark.parametrize("length", [2, 20, 200, 2000])
|
||||
def test_corr_for_constant_columns(self, length):
|
||||
# GH: 37448
|
||||
df = DataFrame(length * [[0.4, 0.1]], columns=["A", "B"])
|
||||
result = df.corr()
|
||||
expected = DataFrame(
|
||||
{"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, index=["A", "B"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_calc_corr_small_numbers(self):
|
||||
# GH: 37452
|
||||
df = DataFrame(
|
||||
{"A": [1.0e-20, 2.0e-20, 3.0e-20], "B": [1.0e-20, 2.0e-20, 3.0e-20]}
|
||||
)
|
||||
result = df.corr()
|
||||
expected = DataFrame({"A": [1.0, 1.0], "B": [1.0, 1.0]}, index=["A", "B"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
@pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"])
|
||||
def test_corr_min_periods_greater_than_length(self, method):
|
||||
df = DataFrame({"A": [1, 2], "B": [1, 2]})
|
||||
result = df.corr(method=method, min_periods=3)
|
||||
expected = DataFrame(
|
||||
{"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, index=["A", "B"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestDataFrameCorrWith:
|
||||
def test_corrwith(self, datetime_frame):
|
||||
a = datetime_frame
|
||||
noise = Series(np.random.randn(len(a)), index=a.index)
|
||||
|
||||
b = datetime_frame.add(noise, axis=0)
|
||||
|
||||
# make sure order does not matter
|
||||
b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:])
|
||||
del b["B"]
|
||||
|
||||
colcorr = a.corrwith(b, axis=0)
|
||||
tm.assert_almost_equal(colcorr["A"], a["A"].corr(b["A"]))
|
||||
|
||||
rowcorr = a.corrwith(b, axis=1)
|
||||
tm.assert_series_equal(rowcorr, a.T.corrwith(b.T, axis=0))
|
||||
|
||||
dropped = a.corrwith(b, axis=0, drop=True)
|
||||
tm.assert_almost_equal(dropped["A"], a["A"].corr(b["A"]))
|
||||
assert "B" not in dropped
|
||||
|
||||
dropped = a.corrwith(b, axis=1, drop=True)
|
||||
assert a.index[-1] not in dropped.index
|
||||
|
||||
# non time-series data
|
||||
index = ["a", "b", "c", "d", "e"]
|
||||
columns = ["one", "two", "three", "four"]
|
||||
df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns)
|
||||
df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns)
|
||||
correls = df1.corrwith(df2, axis=1)
|
||||
for row in index[:4]:
|
||||
tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row]))
|
||||
|
||||
def test_corrwith_with_objects(self):
|
||||
df1 = tm.makeTimeDataFrame()
|
||||
df2 = tm.makeTimeDataFrame()
|
||||
cols = ["A", "B", "C", "D"]
|
||||
|
||||
df1["obj"] = "foo"
|
||||
df2["obj"] = "bar"
|
||||
|
||||
result = df1.corrwith(df2)
|
||||
expected = df1.loc[:, cols].corrwith(df2.loc[:, cols])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df1.corrwith(df2, axis=1)
|
||||
expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_corrwith_series(self, datetime_frame):
|
||||
result = datetime_frame.corrwith(datetime_frame["A"])
|
||||
expected = datetime_frame.apply(datetime_frame["A"].corr)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_corrwith_matches_corrcoef(self):
|
||||
df1 = DataFrame(np.arange(10000), columns=["a"])
|
||||
df2 = DataFrame(np.arange(10000) ** 2, columns=["a"])
|
||||
c1 = df1.corrwith(df2)["a"]
|
||||
c2 = np.corrcoef(df1["a"], df2["a"])[0][1]
|
||||
|
||||
tm.assert_almost_equal(c1, c2)
|
||||
assert c1 < 1
|
||||
|
||||
def test_corrwith_mixed_dtypes(self):
|
||||
# GH#18570
|
||||
df = DataFrame(
|
||||
{"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]}
|
||||
)
|
||||
s = Series([0, 6, 7, 3])
|
||||
result = df.corrwith(s)
|
||||
corrs = [df["a"].corr(s), df["b"].corr(s)]
|
||||
expected = Series(data=corrs, index=["a", "b"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_corrwith_index_intersection(self):
|
||||
df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"])
|
||||
df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"])
|
||||
|
||||
result = df1.corrwith(df2, drop=True).index.sort_values()
|
||||
expected = df1.columns.intersection(df2.columns).sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_corrwith_index_union(self):
|
||||
df1 = DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"])
|
||||
df2 = DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"])
|
||||
|
||||
result = df1.corrwith(df2, drop=False).index.sort_values()
|
||||
expected = df1.columns.union(df2.columns).sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_corrwith_dup_cols(self):
|
||||
# GH#21925
|
||||
df1 = DataFrame(np.vstack([np.arange(10)] * 3).T)
|
||||
df2 = df1.copy()
|
||||
df2 = pd.concat((df2, df2[0]), axis=1)
|
||||
|
||||
result = df1.corrwith(df2)
|
||||
expected = Series(np.ones(4), index=[0, 0, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_corr_numerical_instabilities(self):
|
||||
# GH#45640
|
||||
df = DataFrame([[0.2, 0.4], [0.4, 0.2]])
|
||||
result = df.corr()
|
||||
expected = DataFrame({0: [1.0, -1.0], 1: [-1.0, 1.0]})
|
||||
tm.assert_frame_equal(result - 1, expected - 1, atol=1e-17)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
def test_corrwith_spearman(self):
|
||||
# GH#21925
|
||||
df = DataFrame(np.random.random(size=(100, 3)))
|
||||
result = df.corrwith(df**2, method="spearman")
|
||||
expected = Series(np.ones(len(result)))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@td.skip_if_no_scipy
|
||||
def test_corrwith_kendall(self):
|
||||
# GH#21925
|
||||
df = DataFrame(np.random.random(size=(100, 3)))
|
||||
result = df.corrwith(df**2, method="kendall")
|
||||
expected = Series(np.ones(len(result)))
|
||||
tm.assert_series_equal(result, expected)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user