1269 lines
		
	
	
		
			42 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1269 lines
		
	
	
		
			42 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import annotations
 | 
						||
 | 
						||
from collections import abc
 | 
						||
from datetime import datetime
 | 
						||
from functools import partial
 | 
						||
from itertools import islice
 | 
						||
from typing import (
 | 
						||
    TYPE_CHECKING,
 | 
						||
    Callable,
 | 
						||
    Hashable,
 | 
						||
    List,
 | 
						||
    Tuple,
 | 
						||
    TypeVar,
 | 
						||
    Union,
 | 
						||
    overload,
 | 
						||
)
 | 
						||
import warnings
 | 
						||
 | 
						||
import numpy as np
 | 
						||
 | 
						||
from pandas._libs import tslib
 | 
						||
from pandas._libs.tslibs import (
 | 
						||
    OutOfBoundsDatetime,
 | 
						||
    Timedelta,
 | 
						||
    Timestamp,
 | 
						||
    conversion,
 | 
						||
    iNaT,
 | 
						||
    nat_strings,
 | 
						||
    parsing,
 | 
						||
)
 | 
						||
from pandas._libs.tslibs.parsing import (  # noqa:F401
 | 
						||
    DateParseError,
 | 
						||
    format_is_iso,
 | 
						||
    guess_datetime_format,
 | 
						||
)
 | 
						||
from pandas._libs.tslibs.strptime import array_strptime
 | 
						||
from pandas._typing import (
 | 
						||
    AnyArrayLike,
 | 
						||
    ArrayLike,
 | 
						||
    Timezone,
 | 
						||
)
 | 
						||
from pandas.util._exceptions import find_stack_level
 | 
						||
 | 
						||
from pandas.core.dtypes.common import (
 | 
						||
    ensure_object,
 | 
						||
    is_datetime64_dtype,
 | 
						||
    is_datetime64_ns_dtype,
 | 
						||
    is_datetime64tz_dtype,
 | 
						||
    is_float,
 | 
						||
    is_integer,
 | 
						||
    is_integer_dtype,
 | 
						||
    is_list_like,
 | 
						||
    is_numeric_dtype,
 | 
						||
    is_scalar,
 | 
						||
)
 | 
						||
from pandas.core.dtypes.generic import (
 | 
						||
    ABCDataFrame,
 | 
						||
    ABCSeries,
 | 
						||
)
 | 
						||
from pandas.core.dtypes.missing import notna
 | 
						||
 | 
						||
from pandas.arrays import (
 | 
						||
    DatetimeArray,
 | 
						||
    IntegerArray,
 | 
						||
)
 | 
						||
from pandas.core import algorithms
 | 
						||
from pandas.core.algorithms import unique
 | 
						||
from pandas.core.arrays.datetimes import (
 | 
						||
    maybe_convert_dtype,
 | 
						||
    objects_to_datetime64ns,
 | 
						||
    tz_to_dtype,
 | 
						||
)
 | 
						||
from pandas.core.construction import extract_array
 | 
						||
from pandas.core.indexes.base import Index
 | 
						||
from pandas.core.indexes.datetimes import DatetimeIndex
 | 
						||
 | 
						||
if TYPE_CHECKING:
 | 
						||
    from pandas._libs.tslibs.nattype import NaTType
 | 
						||
 | 
						||
    from pandas import Series
 | 
						||
 | 
						||
# ---------------------------------------------------------------------
 | 
						||
# types used in annotations
 | 
						||
 | 
						||
ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"]
 | 
						||
Scalar = Union[int, float, str]
 | 
						||
DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
 | 
						||
DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible]
 | 
						||
start_caching_at = 50
 | 
						||
 | 
						||
 | 
						||
# ---------------------------------------------------------------------
 | 
						||
 | 
						||
 | 
						||
def _guess_datetime_format_for_array(arr, **kwargs):
 | 
						||
    # Try to guess the format based on the first non-NaN element
 | 
						||
    non_nan_elements = notna(arr).nonzero()[0]
 | 
						||
    if len(non_nan_elements):
 | 
						||
        return guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
 | 
						||
 | 
						||
 | 
						||
def should_cache(
 | 
						||
    arg: ArrayConvertible, unique_share: float = 0.7, check_count: int | None = None
 | 
						||
) -> bool:
 | 
						||
    """
 | 
						||
    Decides whether to do caching.
 | 
						||
 | 
						||
    If the percent of unique elements among `check_count` elements less
 | 
						||
    than `unique_share * 100` then we can do caching.
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    arg: listlike, tuple, 1-d array, Series
 | 
						||
    unique_share: float, default=0.7, optional
 | 
						||
        0 < unique_share < 1
 | 
						||
    check_count: int, optional
 | 
						||
        0 <= check_count <= len(arg)
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    do_caching: bool
 | 
						||
 | 
						||
    Notes
 | 
						||
    -----
 | 
						||
    By default for a sequence of less than 50 items in size, we don't do
 | 
						||
    caching; for the number of elements less than 5000, we take ten percent of
 | 
						||
    all elements to check for a uniqueness share; if the sequence size is more
 | 
						||
    than 5000, then we check only the first 500 elements.
 | 
						||
    All constants were chosen empirically by.
 | 
						||
    """
 | 
						||
    do_caching = True
 | 
						||
 | 
						||
    # default realization
 | 
						||
    if check_count is None:
 | 
						||
        # in this case, the gain from caching is negligible
 | 
						||
        if len(arg) <= start_caching_at:
 | 
						||
            return False
 | 
						||
 | 
						||
        if len(arg) <= 5000:
 | 
						||
            check_count = len(arg) // 10
 | 
						||
        else:
 | 
						||
            check_count = 500
 | 
						||
    else:
 | 
						||
        assert (
 | 
						||
            0 <= check_count <= len(arg)
 | 
						||
        ), "check_count must be in next bounds: [0; len(arg)]"
 | 
						||
        if check_count == 0:
 | 
						||
            return False
 | 
						||
 | 
						||
    assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)"
 | 
						||
 | 
						||
    try:
 | 
						||
        # We can't cache if the items are not hashable.
 | 
						||
        unique_elements = set(islice(arg, check_count))
 | 
						||
    except TypeError:
 | 
						||
        return False
 | 
						||
    if len(unique_elements) > check_count * unique_share:
 | 
						||
        do_caching = False
 | 
						||
    return do_caching
 | 
						||
 | 
						||
 | 
						||
def _maybe_cache(
 | 
						||
    arg: ArrayConvertible,
 | 
						||
    format: str | None,
 | 
						||
    cache: bool,
 | 
						||
    convert_listlike: Callable,
 | 
						||
) -> Series:
 | 
						||
    """
 | 
						||
    Create a cache of unique dates from an array of dates
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    arg : listlike, tuple, 1-d array, Series
 | 
						||
    format : string
 | 
						||
        Strftime format to parse time
 | 
						||
    cache : bool
 | 
						||
        True attempts to create a cache of converted values
 | 
						||
    convert_listlike : function
 | 
						||
        Conversion function to apply on dates
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    cache_array : Series
 | 
						||
        Cache of converted, unique dates. Can be empty
 | 
						||
    """
 | 
						||
    from pandas import Series
 | 
						||
 | 
						||
    cache_array = Series(dtype=object)
 | 
						||
 | 
						||
    if cache:
 | 
						||
        # Perform a quicker unique check
 | 
						||
        if not should_cache(arg):
 | 
						||
            return cache_array
 | 
						||
 | 
						||
        unique_dates = unique(arg)
 | 
						||
        if len(unique_dates) < len(arg):
 | 
						||
            cache_dates = convert_listlike(unique_dates, format)
 | 
						||
            cache_array = Series(cache_dates, index=unique_dates)
 | 
						||
            # GH#39882 and GH#35888 in case of None and NaT we get duplicates
 | 
						||
            if not cache_array.index.is_unique:
 | 
						||
                cache_array = cache_array[~cache_array.index.duplicated()]
 | 
						||
    return cache_array
 | 
						||
 | 
						||
 | 
						||
def _box_as_indexlike(
 | 
						||
    dt_array: ArrayLike, utc: bool | None = None, name: Hashable = None
 | 
						||
) -> Index:
 | 
						||
    """
 | 
						||
    Properly boxes the ndarray of datetimes to DatetimeIndex
 | 
						||
    if it is possible or to generic Index instead
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    dt_array: 1-d array
 | 
						||
        Array of datetimes to be wrapped in an Index.
 | 
						||
    tz : object
 | 
						||
        None or 'utc'
 | 
						||
    name : string, default None
 | 
						||
        Name for a resulting index
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    result : datetime of converted dates
 | 
						||
        - DatetimeIndex if convertible to sole datetime64 type
 | 
						||
        - general Index otherwise
 | 
						||
    """
 | 
						||
 | 
						||
    if is_datetime64_dtype(dt_array):
 | 
						||
        tz = "utc" if utc else None
 | 
						||
        return DatetimeIndex(dt_array, tz=tz, name=name)
 | 
						||
    return Index(dt_array, name=name, dtype=dt_array.dtype)
 | 
						||
 | 
						||
 | 
						||
def _convert_and_box_cache(
 | 
						||
    arg: DatetimeScalarOrArrayConvertible,
 | 
						||
    cache_array: Series,
 | 
						||
    name: str | None = None,
 | 
						||
) -> Index:
 | 
						||
    """
 | 
						||
    Convert array of dates with a cache and wrap the result in an Index.
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    arg : integer, float, string, datetime, list, tuple, 1-d array, Series
 | 
						||
    cache_array : Series
 | 
						||
        Cache of converted, unique dates
 | 
						||
    name : string, default None
 | 
						||
        Name for a DatetimeIndex
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    result : Index-like of converted dates
 | 
						||
    """
 | 
						||
    from pandas import Series
 | 
						||
 | 
						||
    result = Series(arg).map(cache_array)
 | 
						||
    return _box_as_indexlike(result._values, utc=None, name=name)
 | 
						||
 | 
						||
 | 
						||
def _return_parsed_timezone_results(result: np.ndarray, timezones, tz, name) -> Index:
 | 
						||
    """
 | 
						||
    Return results from array_strptime if a %z or %Z directive was passed.
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    result : ndarray[int64]
 | 
						||
        int64 date representations of the dates
 | 
						||
    timezones : ndarray
 | 
						||
        pytz timezone objects
 | 
						||
    tz : object
 | 
						||
        None or pytz timezone object
 | 
						||
    name : string, default None
 | 
						||
        Name for a DatetimeIndex
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    tz_result : Index-like of parsed dates with timezone
 | 
						||
    """
 | 
						||
    tz_results = np.array(
 | 
						||
        [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)]
 | 
						||
    )
 | 
						||
    if tz is not None:
 | 
						||
        # Convert to the same tz
 | 
						||
        tz_results = np.array([tz_result.tz_convert(tz) for tz_result in tz_results])
 | 
						||
 | 
						||
    return Index(tz_results, name=name)
 | 
						||
 | 
						||
 | 
						||
def _convert_listlike_datetimes(
 | 
						||
    arg,
 | 
						||
    format: str | None,
 | 
						||
    name: Hashable = None,
 | 
						||
    tz: Timezone | None = None,
 | 
						||
    unit: str | None = None,
 | 
						||
    errors: str = "raise",
 | 
						||
    infer_datetime_format: bool = False,
 | 
						||
    dayfirst: bool | None = None,
 | 
						||
    yearfirst: bool | None = None,
 | 
						||
    exact: bool = True,
 | 
						||
):
 | 
						||
    """
 | 
						||
    Helper function for to_datetime. Performs the conversions of 1D listlike
 | 
						||
    of dates
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    arg : list, tuple, ndarray, Series, Index
 | 
						||
        date to be parsed
 | 
						||
    name : object
 | 
						||
        None or string for the Index name
 | 
						||
    tz : object
 | 
						||
        None or 'utc'
 | 
						||
    unit : str
 | 
						||
        None or string of the frequency of the passed data
 | 
						||
    errors : str
 | 
						||
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
 | 
						||
    infer_datetime_format : bool, default False
 | 
						||
        inferring format behavior from to_datetime
 | 
						||
    dayfirst : bool
 | 
						||
        dayfirst parsing behavior from to_datetime
 | 
						||
    yearfirst : bool
 | 
						||
        yearfirst parsing behavior from to_datetime
 | 
						||
    exact : bool, default True
 | 
						||
        exact format matching behavior from to_datetime
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    Index-like of parsed dates
 | 
						||
    """
 | 
						||
    if isinstance(arg, (list, tuple)):
 | 
						||
        arg = np.array(arg, dtype="O")
 | 
						||
 | 
						||
    arg_dtype = getattr(arg, "dtype", None)
 | 
						||
    # these are shortcutable
 | 
						||
    if is_datetime64tz_dtype(arg_dtype):
 | 
						||
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
 | 
						||
            return DatetimeIndex(arg, tz=tz, name=name)
 | 
						||
        if tz == "utc":
 | 
						||
            arg = arg.tz_convert(None).tz_localize(tz)
 | 
						||
        return arg
 | 
						||
 | 
						||
    elif is_datetime64_ns_dtype(arg_dtype):
 | 
						||
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
 | 
						||
            try:
 | 
						||
                return DatetimeIndex(arg, tz=tz, name=name)
 | 
						||
            except ValueError:
 | 
						||
                pass
 | 
						||
        elif tz:
 | 
						||
            # DatetimeArray, DatetimeIndex
 | 
						||
            return arg.tz_localize(tz)
 | 
						||
 | 
						||
        return arg
 | 
						||
 | 
						||
    elif unit is not None:
 | 
						||
        if format is not None:
 | 
						||
            raise ValueError("cannot specify both format and unit")
 | 
						||
        return _to_datetime_with_unit(arg, unit, name, tz, errors)
 | 
						||
    elif getattr(arg, "ndim", 1) > 1:
 | 
						||
        raise TypeError(
 | 
						||
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
 | 
						||
        )
 | 
						||
 | 
						||
    # warn if passing timedelta64, raise for PeriodDtype
 | 
						||
    # NB: this must come after unit transformation
 | 
						||
    orig_arg = arg
 | 
						||
    try:
 | 
						||
        arg, _ = maybe_convert_dtype(arg, copy=False)
 | 
						||
    except TypeError:
 | 
						||
        if errors == "coerce":
 | 
						||
            npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
 | 
						||
            return DatetimeIndex(npvalues, name=name)
 | 
						||
        elif errors == "ignore":
 | 
						||
            idx = Index(arg, name=name)
 | 
						||
            return idx
 | 
						||
        raise
 | 
						||
 | 
						||
    arg = ensure_object(arg)
 | 
						||
    require_iso8601 = False
 | 
						||
 | 
						||
    if infer_datetime_format and format is None:
 | 
						||
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
 | 
						||
 | 
						||
    if format is not None:
 | 
						||
        # There is a special fast-path for iso8601 formatted
 | 
						||
        # datetime strings, so in those cases don't use the inferred
 | 
						||
        # format because this path makes process slower in this
 | 
						||
        # special case
 | 
						||
        format_is_iso8601 = format_is_iso(format)
 | 
						||
        if format_is_iso8601:
 | 
						||
            require_iso8601 = not infer_datetime_format
 | 
						||
            format = None
 | 
						||
 | 
						||
    if format is not None:
 | 
						||
        res = _to_datetime_with_format(
 | 
						||
            arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
 | 
						||
        )
 | 
						||
        if res is not None:
 | 
						||
            return res
 | 
						||
 | 
						||
    assert format is None or infer_datetime_format
 | 
						||
    utc = tz == "utc"
 | 
						||
    result, tz_parsed = objects_to_datetime64ns(
 | 
						||
        arg,
 | 
						||
        dayfirst=dayfirst,
 | 
						||
        yearfirst=yearfirst,
 | 
						||
        utc=utc,
 | 
						||
        errors=errors,
 | 
						||
        require_iso8601=require_iso8601,
 | 
						||
        allow_object=True,
 | 
						||
    )
 | 
						||
 | 
						||
    if tz_parsed is not None:
 | 
						||
        # We can take a shortcut since the datetime64 numpy array
 | 
						||
        # is in UTC
 | 
						||
        dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
 | 
						||
        return DatetimeIndex._simple_new(dta, name=name)
 | 
						||
 | 
						||
    utc = tz == "utc"
 | 
						||
    return _box_as_indexlike(result, utc=utc, name=name)
 | 
						||
 | 
						||
 | 
						||
def _array_strptime_with_fallback(
 | 
						||
    arg,
 | 
						||
    name,
 | 
						||
    tz,
 | 
						||
    fmt: str,
 | 
						||
    exact: bool,
 | 
						||
    errors: str,
 | 
						||
    infer_datetime_format: bool,
 | 
						||
) -> Index | None:
 | 
						||
    """
 | 
						||
    Call array_strptime, with fallback behavior depending on 'errors'.
 | 
						||
    """
 | 
						||
    utc = tz == "utc"
 | 
						||
 | 
						||
    try:
 | 
						||
        result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors)
 | 
						||
        if "%Z" in fmt or "%z" in fmt:
 | 
						||
            return _return_parsed_timezone_results(result, timezones, tz, name)
 | 
						||
    except OutOfBoundsDatetime:
 | 
						||
        if errors == "raise":
 | 
						||
            raise
 | 
						||
        elif errors == "coerce":
 | 
						||
            result = np.empty(arg.shape, dtype="M8[ns]")
 | 
						||
            iresult = result.view("i8")
 | 
						||
            iresult.fill(iNaT)
 | 
						||
        else:
 | 
						||
            result = arg
 | 
						||
    except ValueError:
 | 
						||
        # if fmt was inferred, try falling back
 | 
						||
        # to array_to_datetime - terminate here
 | 
						||
        # for specified formats
 | 
						||
        if not infer_datetime_format:
 | 
						||
            if errors == "raise":
 | 
						||
                raise
 | 
						||
            elif errors == "coerce":
 | 
						||
                result = np.empty(arg.shape, dtype="M8[ns]")
 | 
						||
                iresult = result.view("i8")
 | 
						||
                iresult.fill(iNaT)
 | 
						||
            else:
 | 
						||
                result = arg
 | 
						||
        else:
 | 
						||
            # Indicates to the caller to fallback to objects_to_datetime64ns
 | 
						||
            return None
 | 
						||
 | 
						||
    return _box_as_indexlike(result, utc=utc, name=name)
 | 
						||
 | 
						||
 | 
						||
def _to_datetime_with_format(
 | 
						||
    arg,
 | 
						||
    orig_arg,
 | 
						||
    name,
 | 
						||
    tz,
 | 
						||
    fmt: str,
 | 
						||
    exact: bool,
 | 
						||
    errors: str,
 | 
						||
    infer_datetime_format: bool,
 | 
						||
) -> Index | None:
 | 
						||
    """
 | 
						||
    Try parsing with the given format, returning None on failure.
 | 
						||
    """
 | 
						||
    result = None
 | 
						||
    try:
 | 
						||
        # shortcut formatting here
 | 
						||
        if fmt == "%Y%m%d":
 | 
						||
            # pass orig_arg as float-dtype may have been converted to
 | 
						||
            # datetime64[ns]
 | 
						||
            orig_arg = ensure_object(orig_arg)
 | 
						||
            try:
 | 
						||
                # may return None without raising
 | 
						||
                result = _attempt_YYYYMMDD(orig_arg, errors=errors)
 | 
						||
            except (ValueError, TypeError, OutOfBoundsDatetime) as err:
 | 
						||
                raise ValueError(
 | 
						||
                    "cannot convert the input to '%Y%m%d' date format"
 | 
						||
                ) from err
 | 
						||
            if result is not None:
 | 
						||
                utc = tz == "utc"
 | 
						||
                return _box_as_indexlike(result, utc=utc, name=name)
 | 
						||
 | 
						||
        # fallback
 | 
						||
        res = _array_strptime_with_fallback(
 | 
						||
            arg, name, tz, fmt, exact, errors, infer_datetime_format
 | 
						||
        )
 | 
						||
        return res
 | 
						||
 | 
						||
    except ValueError as err:
 | 
						||
        # Fallback to try to convert datetime objects if timezone-aware
 | 
						||
        #  datetime objects are found without passing `utc=True`
 | 
						||
        try:
 | 
						||
            values, tz = conversion.datetime_to_datetime64(arg)
 | 
						||
            dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
 | 
						||
            return DatetimeIndex._simple_new(dta, name=name)
 | 
						||
        except (ValueError, TypeError):
 | 
						||
            raise err
 | 
						||
 | 
						||
 | 
						||
def _to_datetime_with_unit(arg, unit, name, tz, errors: str) -> Index:
 | 
						||
    """
 | 
						||
    to_datetime specalized to the case where a 'unit' is passed.
 | 
						||
    """
 | 
						||
    arg = extract_array(arg, extract_numpy=True)
 | 
						||
 | 
						||
    # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime
 | 
						||
    # because it expects an ndarray argument
 | 
						||
    if isinstance(arg, IntegerArray):
 | 
						||
        arr = arg.astype(f"datetime64[{unit}]")
 | 
						||
        tz_parsed = None
 | 
						||
    else:
 | 
						||
        arg = np.asarray(arg)
 | 
						||
        arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
 | 
						||
 | 
						||
    if errors == "ignore":
 | 
						||
        # Index constructor _may_ infer to DatetimeIndex
 | 
						||
        result = Index._with_infer(arr, name=name)
 | 
						||
    else:
 | 
						||
        result = DatetimeIndex(arr, name=name)
 | 
						||
 | 
						||
    if not isinstance(result, DatetimeIndex):
 | 
						||
        return result
 | 
						||
 | 
						||
    # GH#23758: We may still need to localize the result with tz
 | 
						||
    # GH#25546: Apply tz_parsed first (from arg), then tz (from caller)
 | 
						||
    # result will be naive but in UTC
 | 
						||
    result = result.tz_localize("UTC").tz_convert(tz_parsed)
 | 
						||
 | 
						||
    if tz is not None:
 | 
						||
        if result.tz is None:
 | 
						||
            result = result.tz_localize(tz)
 | 
						||
        else:
 | 
						||
            result = result.tz_convert(tz)
 | 
						||
    return result
 | 
						||
 | 
						||
 | 
						||
def _adjust_to_origin(arg, origin, unit):
 | 
						||
    """
 | 
						||
    Helper function for to_datetime.
 | 
						||
    Adjust input argument to the specified origin
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    arg : list, tuple, ndarray, Series, Index
 | 
						||
        date to be adjusted
 | 
						||
    origin : 'julian' or Timestamp
 | 
						||
        origin offset for the arg
 | 
						||
    unit : str
 | 
						||
        passed unit from to_datetime, must be 'D'
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    ndarray or scalar of adjusted date(s)
 | 
						||
    """
 | 
						||
    if origin == "julian":
 | 
						||
        original = arg
 | 
						||
        j0 = Timestamp(0).to_julian_date()
 | 
						||
        if unit != "D":
 | 
						||
            raise ValueError("unit must be 'D' for origin='julian'")
 | 
						||
        try:
 | 
						||
            arg = arg - j0
 | 
						||
        except TypeError as err:
 | 
						||
            raise ValueError(
 | 
						||
                "incompatible 'arg' type for given 'origin'='julian'"
 | 
						||
            ) from err
 | 
						||
 | 
						||
        # preemptively check this for a nice range
 | 
						||
        j_max = Timestamp.max.to_julian_date() - j0
 | 
						||
        j_min = Timestamp.min.to_julian_date() - j0
 | 
						||
        if np.any(arg > j_max) or np.any(arg < j_min):
 | 
						||
            raise OutOfBoundsDatetime(
 | 
						||
                f"{original} is Out of Bounds for origin='julian'"
 | 
						||
            )
 | 
						||
    else:
 | 
						||
        # arg must be numeric
 | 
						||
        if not (
 | 
						||
            (is_scalar(arg) and (is_integer(arg) or is_float(arg)))
 | 
						||
            or is_numeric_dtype(np.asarray(arg))
 | 
						||
        ):
 | 
						||
            raise ValueError(
 | 
						||
                f"'{arg}' is not compatible with origin='{origin}'; "
 | 
						||
                "it must be numeric with a unit specified"
 | 
						||
            )
 | 
						||
 | 
						||
        # we are going to offset back to unix / epoch time
 | 
						||
        try:
 | 
						||
            offset = Timestamp(origin)
 | 
						||
        except OutOfBoundsDatetime as err:
 | 
						||
            raise OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") from err
 | 
						||
        except ValueError as err:
 | 
						||
            raise ValueError(
 | 
						||
                f"origin {origin} cannot be converted to a Timestamp"
 | 
						||
            ) from err
 | 
						||
 | 
						||
        if offset.tz is not None:
 | 
						||
            raise ValueError(f"origin offset {offset} must be tz-naive")
 | 
						||
        td_offset = offset - Timestamp(0)
 | 
						||
 | 
						||
        # convert the offset to the unit of the arg
 | 
						||
        # this should be lossless in terms of precision
 | 
						||
        ioffset = td_offset // Timedelta(1, unit=unit)
 | 
						||
 | 
						||
        # scalars & ndarray-like can handle the addition
 | 
						||
        if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)):
 | 
						||
            arg = np.asarray(arg)
 | 
						||
        arg = arg + ioffset
 | 
						||
    return arg
 | 
						||
 | 
						||
 | 
						||
@overload
 | 
						||
def to_datetime(
 | 
						||
    arg: DatetimeScalar,
 | 
						||
    errors: str = ...,
 | 
						||
    dayfirst: bool = ...,
 | 
						||
    yearfirst: bool = ...,
 | 
						||
    utc: bool | None = ...,
 | 
						||
    format: str | None = ...,
 | 
						||
    exact: bool = ...,
 | 
						||
    unit: str | None = ...,
 | 
						||
    infer_datetime_format: bool = ...,
 | 
						||
    origin=...,
 | 
						||
    cache: bool = ...,
 | 
						||
) -> DatetimeScalar | NaTType:
 | 
						||
    ...
 | 
						||
 | 
						||
 | 
						||
@overload
 | 
						||
def to_datetime(
 | 
						||
    arg: Series,
 | 
						||
    errors: str = ...,
 | 
						||
    dayfirst: bool = ...,
 | 
						||
    yearfirst: bool = ...,
 | 
						||
    utc: bool | None = ...,
 | 
						||
    format: str | None = ...,
 | 
						||
    exact: bool = ...,
 | 
						||
    unit: str | None = ...,
 | 
						||
    infer_datetime_format: bool = ...,
 | 
						||
    origin=...,
 | 
						||
    cache: bool = ...,
 | 
						||
) -> Series:
 | 
						||
    ...
 | 
						||
 | 
						||
 | 
						||
@overload
 | 
						||
def to_datetime(
 | 
						||
    arg: list | tuple | np.ndarray,
 | 
						||
    errors: str = ...,
 | 
						||
    dayfirst: bool = ...,
 | 
						||
    yearfirst: bool = ...,
 | 
						||
    utc: bool | None = ...,
 | 
						||
    format: str | None = ...,
 | 
						||
    exact: bool = ...,
 | 
						||
    unit: str | None = ...,
 | 
						||
    infer_datetime_format: bool = ...,
 | 
						||
    origin=...,
 | 
						||
    cache: bool = ...,
 | 
						||
) -> DatetimeIndex:
 | 
						||
    ...
 | 
						||
 | 
						||
 | 
						||
def to_datetime(
 | 
						||
    arg: DatetimeScalarOrArrayConvertible,
 | 
						||
    errors: str = "raise",
 | 
						||
    dayfirst: bool = False,
 | 
						||
    yearfirst: bool = False,
 | 
						||
    utc: bool | None = None,
 | 
						||
    format: str | None = None,
 | 
						||
    exact: bool = True,
 | 
						||
    unit: str | None = None,
 | 
						||
    infer_datetime_format: bool = False,
 | 
						||
    origin="unix",
 | 
						||
    cache: bool = True,
 | 
						||
) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None:
 | 
						||
    """
 | 
						||
    Convert argument to datetime.
 | 
						||
 | 
						||
    This function converts a scalar, array-like, :class:`Series` or
 | 
						||
    :class:`DataFrame`/dict-like to a pandas datetime object.
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like
 | 
						||
        The object to convert to a datetime. If a :class:`DataFrame` is provided, the
 | 
						||
        method expects minimally the following columns: :const:`"year"`,
 | 
						||
        :const:`"month"`, :const:`"day"`.
 | 
						||
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
 | 
						||
        - If :const:`'raise'`, then invalid parsing will raise an exception.
 | 
						||
        - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`.
 | 
						||
        - If :const:`'ignore'`, then invalid parsing will return the input.
 | 
						||
    dayfirst : bool, default False
 | 
						||
        Specify a date parse order if `arg` is str or is list-like.
 | 
						||
        If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"`
 | 
						||
        is parsed as :const:`2012-11-10`.
 | 
						||
 | 
						||
        .. warning::
 | 
						||
 | 
						||
            ``dayfirst=True`` is not strict, but will prefer to parse
 | 
						||
            with day first. If a delimited date string cannot be parsed in
 | 
						||
            accordance with the given `dayfirst` option, e.g.
 | 
						||
            ``to_datetime(['31-12-2021'])``, then a warning will be shown.
 | 
						||
 | 
						||
    yearfirst : bool, default False
 | 
						||
        Specify a date parse order if `arg` is str or is list-like.
 | 
						||
 | 
						||
        - If :const:`True` parses dates with the year first, e.g.
 | 
						||
          :const:`"10/11/12"` is parsed as :const:`2010-11-12`.
 | 
						||
        - If both `dayfirst` and `yearfirst` are :const:`True`, `yearfirst` is
 | 
						||
          preceded (same as :mod:`dateutil`).
 | 
						||
 | 
						||
        .. warning::
 | 
						||
 | 
						||
            ``yearfirst=True`` is not strict, but will prefer to parse
 | 
						||
            with year first.
 | 
						||
 | 
						||
    utc : bool, default None
 | 
						||
        Control timezone-related parsing, localization and conversion.
 | 
						||
 | 
						||
        - If :const:`True`, the function *always* returns a timezone-aware
 | 
						||
          UTC-localized :class:`Timestamp`, :class:`Series` or
 | 
						||
          :class:`DatetimeIndex`. To do this, timezone-naive inputs are
 | 
						||
          *localized* as UTC, while timezone-aware inputs are *converted* to UTC.
 | 
						||
 | 
						||
        - If :const:`False` (default), inputs will not be coerced to UTC.
 | 
						||
          Timezone-naive inputs will remain naive, while timezone-aware ones
 | 
						||
          will keep their time offsets. Limitations exist for mixed
 | 
						||
          offsets (typically, daylight savings), see :ref:`Examples
 | 
						||
          <to_datetime_tz_examples>` section for details.
 | 
						||
 | 
						||
        See also: pandas general documentation about `timezone conversion and
 | 
						||
        localization
 | 
						||
        <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
 | 
						||
        #time-zone-handling>`_.
 | 
						||
 | 
						||
    format : str, default None
 | 
						||
        The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. Note that
 | 
						||
        :const:`"%f"` will parse all the way up to nanoseconds. See
 | 
						||
        `strftime documentation
 | 
						||
        <https://docs.python.org/3/library/datetime.html
 | 
						||
        #strftime-and-strptime-behavior>`_ for more information on choices.
 | 
						||
    exact : bool, default True
 | 
						||
        Control how `format` is used:
 | 
						||
 | 
						||
        - If :const:`True`, require an exact `format` match.
 | 
						||
        - If :const:`False`, allow the `format` to match anywhere in the target
 | 
						||
          string.
 | 
						||
 | 
						||
    unit : str, default 'ns'
 | 
						||
        The unit of the arg (D,s,ms,us,ns) denote the unit, which is an
 | 
						||
        integer or float number. This will be based off the origin.
 | 
						||
        Example, with ``unit='ms'`` and ``origin='unix'`` (the default), this
 | 
						||
        would calculate the number of milliseconds to the unix epoch start.
 | 
						||
    infer_datetime_format : bool, default False
 | 
						||
        If :const:`True` and no `format` is given, attempt to infer the format
 | 
						||
        of the datetime strings based on the first non-NaN element,
 | 
						||
        and if it can be inferred, switch to a faster method of parsing them.
 | 
						||
        In some cases this can increase the parsing speed by ~5-10x.
 | 
						||
    origin : scalar, default 'unix'
 | 
						||
        Define the reference date. The numeric values would be parsed as number
 | 
						||
        of units (defined by `unit`) since this reference date.
 | 
						||
 | 
						||
        - If :const:`'unix'` (or POSIX) time; origin is set to 1970-01-01.
 | 
						||
        - If :const:`'julian'`, unit must be :const:`'D'`, and origin is set to
 | 
						||
          beginning of Julian Calendar. Julian day number :const:`0` is assigned
 | 
						||
          to the day starting at noon on January 1, 4713 BC.
 | 
						||
        - If Timestamp convertible, origin is set to Timestamp identified by
 | 
						||
          origin.
 | 
						||
    cache : bool, default True
 | 
						||
        If :const:`True`, use a cache of unique, converted dates to apply the
 | 
						||
        datetime conversion. May produce significant speed-up when parsing
 | 
						||
        duplicate date strings, especially ones with timezone offsets. The cache
 | 
						||
        is only used when there are at least 50 values. The presence of
 | 
						||
        out-of-bounds values will render the cache unusable and may slow down
 | 
						||
        parsing.
 | 
						||
 | 
						||
        .. versionchanged:: 0.25.0
 | 
						||
            changed default value from :const:`False` to :const:`True`.
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    datetime
 | 
						||
        If parsing succeeded.
 | 
						||
        Return type depends on input (types in parenthesis correspond to
 | 
						||
        fallback in case of unsuccessful timezone or out-of-range timestamp
 | 
						||
        parsing):
 | 
						||
 | 
						||
        - scalar: :class:`Timestamp` (or :class:`datetime.datetime`)
 | 
						||
        - array-like: :class:`DatetimeIndex` (or :class:`Series` with
 | 
						||
          :class:`object` dtype containing :class:`datetime.datetime`)
 | 
						||
        - Series: :class:`Series` of :class:`datetime64` dtype (or
 | 
						||
          :class:`Series` of :class:`object` dtype containing
 | 
						||
          :class:`datetime.datetime`)
 | 
						||
        - DataFrame: :class:`Series` of :class:`datetime64` dtype (or
 | 
						||
          :class:`Series` of :class:`object` dtype containing
 | 
						||
          :class:`datetime.datetime`)
 | 
						||
 | 
						||
    Raises
 | 
						||
    ------
 | 
						||
    ParserError
 | 
						||
        When parsing a date from string fails.
 | 
						||
    ValueError
 | 
						||
        When another datetime conversion error happens. For example when one
 | 
						||
        of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or
 | 
						||
        when a Timezone-aware :class:`datetime.datetime` is found in an array-like
 | 
						||
        of mixed time offsets, and ``utc=False``.
 | 
						||
 | 
						||
    See Also
 | 
						||
    --------
 | 
						||
    DataFrame.astype : Cast argument to a specified dtype.
 | 
						||
    to_timedelta : Convert argument to timedelta.
 | 
						||
    convert_dtypes : Convert dtypes.
 | 
						||
 | 
						||
    Notes
 | 
						||
    -----
 | 
						||
 | 
						||
    Many input types are supported, and lead to different output types:
 | 
						||
 | 
						||
    - **scalars** can be int, float, str, datetime object (from stdlib :mod:`datetime`
 | 
						||
      module or :mod:`numpy`). They are converted to :class:`Timestamp` when
 | 
						||
      possible, otherwise they are converted to :class:`datetime.datetime`.
 | 
						||
      None/NaN/null scalars are converted to :const:`NaT`.
 | 
						||
 | 
						||
    - **array-like** can contain int, float, str, datetime objects. They are
 | 
						||
      converted to :class:`DatetimeIndex` when possible, otherwise they are
 | 
						||
      converted to :class:`Index` with :class:`object` dtype, containing
 | 
						||
      :class:`datetime.datetime`. None/NaN/null entries are converted to
 | 
						||
      :const:`NaT` in both cases.
 | 
						||
 | 
						||
    - **Series** are converted to :class:`Series` with :class:`datetime64`
 | 
						||
      dtype when possible, otherwise they are converted to :class:`Series` with
 | 
						||
      :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null
 | 
						||
      entries are converted to :const:`NaT` in both cases.
 | 
						||
 | 
						||
    - **DataFrame/dict-like** are converted to :class:`Series` with
 | 
						||
      :class:`datetime64` dtype. For each row a datetime is created from assembling
 | 
						||
      the various dataframe columns. Column keys can be common abbreviations
 | 
						||
      like [‘year’, ‘month’, ‘day’, ‘minute’, ‘second’, ‘ms’, ‘us’, ‘ns’]) or
 | 
						||
      plurals of the same.
 | 
						||
 | 
						||
    The following causes are responsible for :class:`datetime.datetime` objects
 | 
						||
    being returned (possibly inside an :class:`Index` or a :class:`Series` with
 | 
						||
    :class:`object` dtype) instead of a proper pandas designated type
 | 
						||
    (:class:`Timestamp`, :class:`DatetimeIndex` or :class:`Series`
 | 
						||
    with :class:`datetime64` dtype):
 | 
						||
 | 
						||
    - when any input element is before :const:`Timestamp.min` or after
 | 
						||
      :const:`Timestamp.max`, see `timestamp limitations
 | 
						||
      <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
 | 
						||
      #timeseries-timestamp-limits>`_.
 | 
						||
 | 
						||
    - when ``utc=False`` (default) and the input is an array-like or
 | 
						||
      :class:`Series` containing mixed naive/aware datetime, or aware with mixed
 | 
						||
      time offsets. Note that this happens in the (quite frequent) situation when
 | 
						||
      the timezone has a daylight savings policy. In that case you may wish to
 | 
						||
      use ``utc=True``.
 | 
						||
 | 
						||
    Examples
 | 
						||
    --------
 | 
						||
 | 
						||
    **Handling various input formats**
 | 
						||
 | 
						||
    Assembling a datetime from multiple columns of a :class:`DataFrame`. The keys
 | 
						||
    can be common abbreviations like ['year', 'month', 'day', 'minute', 'second',
 | 
						||
    'ms', 'us', 'ns']) or plurals of the same
 | 
						||
 | 
						||
    >>> df = pd.DataFrame({'year': [2015, 2016],
 | 
						||
    ...                    'month': [2, 3],
 | 
						||
    ...                    'day': [4, 5]})
 | 
						||
    >>> pd.to_datetime(df)
 | 
						||
    0   2015-02-04
 | 
						||
    1   2016-03-05
 | 
						||
    dtype: datetime64[ns]
 | 
						||
 | 
						||
    Passing ``infer_datetime_format=True`` can often-times speedup a parsing
 | 
						||
    if its not an ISO8601 format exactly, but in a regular format.
 | 
						||
 | 
						||
    >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
 | 
						||
    >>> s.head()
 | 
						||
    0    3/11/2000
 | 
						||
    1    3/12/2000
 | 
						||
    2    3/13/2000
 | 
						||
    3    3/11/2000
 | 
						||
    4    3/12/2000
 | 
						||
    dtype: object
 | 
						||
 | 
						||
    >>> %timeit pd.to_datetime(s, infer_datetime_format=True)  # doctest: +SKIP
 | 
						||
    100 loops, best of 3: 10.4 ms per loop
 | 
						||
 | 
						||
    >>> %timeit pd.to_datetime(s, infer_datetime_format=False)  # doctest: +SKIP
 | 
						||
    1 loop, best of 3: 471 ms per loop
 | 
						||
 | 
						||
    Using a unix epoch time
 | 
						||
 | 
						||
    >>> pd.to_datetime(1490195805, unit='s')
 | 
						||
    Timestamp('2017-03-22 15:16:45')
 | 
						||
    >>> pd.to_datetime(1490195805433502912, unit='ns')
 | 
						||
    Timestamp('2017-03-22 15:16:45.433502912')
 | 
						||
 | 
						||
    .. warning:: For float arg, precision rounding might happen. To prevent
 | 
						||
        unexpected behavior use a fixed-width exact type.
 | 
						||
 | 
						||
    Using a non-unix epoch origin
 | 
						||
 | 
						||
    >>> pd.to_datetime([1, 2, 3], unit='D',
 | 
						||
    ...                origin=pd.Timestamp('1960-01-01'))
 | 
						||
    DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'],
 | 
						||
                  dtype='datetime64[ns]', freq=None)
 | 
						||
 | 
						||
    **Non-convertible date/times**
 | 
						||
 | 
						||
    If a date does not meet the `timestamp limitations
 | 
						||
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
 | 
						||
    #timeseries-timestamp-limits>`_, passing ``errors='ignore'``
 | 
						||
    will return the original input instead of raising any exception.
 | 
						||
 | 
						||
    Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`,
 | 
						||
    in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`.
 | 
						||
 | 
						||
    >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore')
 | 
						||
    datetime.datetime(1300, 1, 1, 0, 0)
 | 
						||
    >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
 | 
						||
    NaT
 | 
						||
 | 
						||
    .. _to_datetime_tz_examples:
 | 
						||
 | 
						||
    **Timezones and time offsets**
 | 
						||
 | 
						||
    The default behaviour (``utc=False``) is as follows:
 | 
						||
 | 
						||
    - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`:
 | 
						||
 | 
						||
    >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15'])
 | 
						||
    DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'],
 | 
						||
                  dtype='datetime64[ns]', freq=None)
 | 
						||
 | 
						||
    - Timezone-aware inputs *with constant time offset* are converted to
 | 
						||
      timezone-aware :class:`DatetimeIndex`:
 | 
						||
 | 
						||
    >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500'])
 | 
						||
    DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'],
 | 
						||
                  dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None)
 | 
						||
 | 
						||
    - However, timezone-aware inputs *with mixed time offsets* (for example
 | 
						||
      issued from a timezone with daylight savings, such as Europe/Paris)
 | 
						||
      are **not successfully converted** to a :class:`DatetimeIndex`. Instead a
 | 
						||
      simple :class:`Index` containing :class:`datetime.datetime` objects is
 | 
						||
      returned:
 | 
						||
 | 
						||
    >>> pd.to_datetime(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100'])
 | 
						||
    Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00],
 | 
						||
          dtype='object')
 | 
						||
 | 
						||
    - A mix of timezone-aware and timezone-naive inputs is converted to
 | 
						||
      a timezone-aware :class:`DatetimeIndex` if the offsets of the timezone-aware
 | 
						||
      are constant:
 | 
						||
 | 
						||
    >>> from datetime import datetime
 | 
						||
    >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
 | 
						||
    DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
 | 
						||
                  dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
 | 
						||
 | 
						||
    - Finally, mixing timezone-aware strings and :class:`datetime.datetime` always
 | 
						||
      raises an error, even if the elements all have the same time offset.
 | 
						||
 | 
						||
    >>> from datetime import datetime, timezone, timedelta
 | 
						||
    >>> d = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
 | 
						||
    >>> pd.to_datetime(["2020-01-01 17:00 -0100", d])
 | 
						||
    Traceback (most recent call last):
 | 
						||
        ...
 | 
						||
    ValueError: Tz-aware datetime.datetime cannot be converted to datetime64
 | 
						||
                unless utc=True
 | 
						||
 | 
						||
    |
 | 
						||
 | 
						||
    Setting ``utc=True`` solves most of the above issues:
 | 
						||
 | 
						||
    - Timezone-naive inputs are *localized* as UTC
 | 
						||
 | 
						||
    >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True)
 | 
						||
    DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'],
 | 
						||
                  dtype='datetime64[ns, UTC]', freq=None)
 | 
						||
 | 
						||
    - Timezone-aware inputs are *converted* to UTC (the output represents the
 | 
						||
      exact same datetime, but viewed from the UTC time offset `+00:00`).
 | 
						||
 | 
						||
    >>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'],
 | 
						||
    ...                utc=True)
 | 
						||
    DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'],
 | 
						||
                  dtype='datetime64[ns, UTC]', freq=None)
 | 
						||
 | 
						||
    - Inputs can contain both naive and aware, string or datetime, the above
 | 
						||
      rules still apply
 | 
						||
 | 
						||
    >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 12:00 -0530',
 | 
						||
    ...                datetime(2020, 1, 1, 18),
 | 
						||
    ...                datetime(2020, 1, 1, 18,
 | 
						||
    ...                tzinfo=timezone(-timedelta(hours=1)))],
 | 
						||
    ...                utc=True)
 | 
						||
    DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 17:30:00+00:00',
 | 
						||
                   '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
 | 
						||
                  dtype='datetime64[ns, UTC]', freq=None)
 | 
						||
    """
 | 
						||
    if arg is None:
 | 
						||
        return None
 | 
						||
 | 
						||
    if origin != "unix":
 | 
						||
        arg = _adjust_to_origin(arg, origin, unit)
 | 
						||
 | 
						||
    tz = "utc" if utc else None
 | 
						||
    convert_listlike = partial(
 | 
						||
        _convert_listlike_datetimes,
 | 
						||
        tz=tz,
 | 
						||
        unit=unit,
 | 
						||
        dayfirst=dayfirst,
 | 
						||
        yearfirst=yearfirst,
 | 
						||
        errors=errors,
 | 
						||
        exact=exact,
 | 
						||
        infer_datetime_format=infer_datetime_format,
 | 
						||
    )
 | 
						||
 | 
						||
    result: Timestamp | NaTType | Series | Index
 | 
						||
 | 
						||
    if isinstance(arg, Timestamp):
 | 
						||
        result = arg
 | 
						||
        if tz is not None:
 | 
						||
            if arg.tz is not None:
 | 
						||
                result = arg.tz_convert(tz)
 | 
						||
            else:
 | 
						||
                result = arg.tz_localize(tz)
 | 
						||
    elif isinstance(arg, ABCSeries):
 | 
						||
        cache_array = _maybe_cache(arg, format, cache, convert_listlike)
 | 
						||
        if not cache_array.empty:
 | 
						||
            result = arg.map(cache_array)
 | 
						||
        else:
 | 
						||
            values = convert_listlike(arg._values, format)
 | 
						||
            result = arg._constructor(values, index=arg.index, name=arg.name)
 | 
						||
    elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):
 | 
						||
        result = _assemble_from_unit_mappings(arg, errors, tz)
 | 
						||
    elif isinstance(arg, Index):
 | 
						||
        cache_array = _maybe_cache(arg, format, cache, convert_listlike)
 | 
						||
        if not cache_array.empty:
 | 
						||
            result = _convert_and_box_cache(arg, cache_array, name=arg.name)
 | 
						||
        else:
 | 
						||
            result = convert_listlike(arg, format, name=arg.name)
 | 
						||
    elif is_list_like(arg):
 | 
						||
        try:
 | 
						||
            cache_array = _maybe_cache(arg, format, cache, convert_listlike)
 | 
						||
        except OutOfBoundsDatetime:
 | 
						||
            # caching attempts to create a DatetimeIndex, which may raise
 | 
						||
            # an OOB. If that's the desired behavior, then just reraise...
 | 
						||
            if errors == "raise":
 | 
						||
                raise
 | 
						||
            # ... otherwise, continue without the cache.
 | 
						||
            from pandas import Series
 | 
						||
 | 
						||
            cache_array = Series([], dtype=object)  # just an empty array
 | 
						||
        if not cache_array.empty:
 | 
						||
            result = _convert_and_box_cache(arg, cache_array)
 | 
						||
        else:
 | 
						||
            result = convert_listlike(arg, format)
 | 
						||
    else:
 | 
						||
        result = convert_listlike(np.array([arg]), format)[0]
 | 
						||
 | 
						||
    #  error: Incompatible return value type (got "Union[Timestamp, NaTType,
 | 
						||
    # Series, Index]", expected "Union[DatetimeIndex, Series, float, str,
 | 
						||
    # NaTType, None]")
 | 
						||
    return result  # type: ignore[return-value]
 | 
						||
 | 
						||
 | 
						||
# mappings for assembling units
 | 
						||
_unit_map = {
 | 
						||
    "year": "year",
 | 
						||
    "years": "year",
 | 
						||
    "month": "month",
 | 
						||
    "months": "month",
 | 
						||
    "day": "day",
 | 
						||
    "days": "day",
 | 
						||
    "hour": "h",
 | 
						||
    "hours": "h",
 | 
						||
    "minute": "m",
 | 
						||
    "minutes": "m",
 | 
						||
    "second": "s",
 | 
						||
    "seconds": "s",
 | 
						||
    "ms": "ms",
 | 
						||
    "millisecond": "ms",
 | 
						||
    "milliseconds": "ms",
 | 
						||
    "us": "us",
 | 
						||
    "microsecond": "us",
 | 
						||
    "microseconds": "us",
 | 
						||
    "ns": "ns",
 | 
						||
    "nanosecond": "ns",
 | 
						||
    "nanoseconds": "ns",
 | 
						||
}
 | 
						||
 | 
						||
 | 
						||
def _assemble_from_unit_mappings(arg, errors, tz):
 | 
						||
    """
 | 
						||
    assemble the unit specified fields from the arg (DataFrame)
 | 
						||
    Return a Series for actual parsing
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    arg : DataFrame
 | 
						||
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
 | 
						||
 | 
						||
        - If :const:`'raise'`, then invalid parsing will raise an exception
 | 
						||
        - If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`
 | 
						||
        - If :const:`'ignore'`, then invalid parsing will return the input
 | 
						||
    tz : None or 'utc'
 | 
						||
 | 
						||
    Returns
 | 
						||
    -------
 | 
						||
    Series
 | 
						||
    """
 | 
						||
    from pandas import (
 | 
						||
        DataFrame,
 | 
						||
        to_numeric,
 | 
						||
        to_timedelta,
 | 
						||
    )
 | 
						||
 | 
						||
    arg = DataFrame(arg)
 | 
						||
    if not arg.columns.is_unique:
 | 
						||
        raise ValueError("cannot assemble with duplicate keys")
 | 
						||
 | 
						||
    # replace passed unit with _unit_map
 | 
						||
    def f(value):
 | 
						||
        if value in _unit_map:
 | 
						||
            return _unit_map[value]
 | 
						||
 | 
						||
        # m is case significant
 | 
						||
        if value.lower() in _unit_map:
 | 
						||
            return _unit_map[value.lower()]
 | 
						||
 | 
						||
        return value
 | 
						||
 | 
						||
    unit = {k: f(k) for k in arg.keys()}
 | 
						||
    unit_rev = {v: k for k, v in unit.items()}
 | 
						||
 | 
						||
    # we require at least Ymd
 | 
						||
    required = ["year", "month", "day"]
 | 
						||
    req = sorted(set(required) - set(unit_rev.keys()))
 | 
						||
    if len(req):
 | 
						||
        _required = ",".join(req)
 | 
						||
        raise ValueError(
 | 
						||
            "to assemble mappings requires at least that "
 | 
						||
            f"[year, month, day] be specified: [{_required}] is missing"
 | 
						||
        )
 | 
						||
 | 
						||
    # keys we don't recognize
 | 
						||
    excess = sorted(set(unit_rev.keys()) - set(_unit_map.values()))
 | 
						||
    if len(excess):
 | 
						||
        _excess = ",".join(excess)
 | 
						||
        raise ValueError(
 | 
						||
            f"extra keys have been passed to the datetime assemblage: [{_excess}]"
 | 
						||
        )
 | 
						||
 | 
						||
    def coerce(values):
 | 
						||
        # we allow coercion to if errors allows
 | 
						||
        values = to_numeric(values, errors=errors)
 | 
						||
 | 
						||
        # prevent overflow in case of int8 or int16
 | 
						||
        if is_integer_dtype(values):
 | 
						||
            values = values.astype("int64", copy=False)
 | 
						||
        return values
 | 
						||
 | 
						||
    values = (
 | 
						||
        coerce(arg[unit_rev["year"]]) * 10000
 | 
						||
        + coerce(arg[unit_rev["month"]]) * 100
 | 
						||
        + coerce(arg[unit_rev["day"]])
 | 
						||
    )
 | 
						||
    try:
 | 
						||
        values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz)
 | 
						||
    except (TypeError, ValueError) as err:
 | 
						||
        raise ValueError(f"cannot assemble the datetimes: {err}") from err
 | 
						||
 | 
						||
    for u in ["h", "m", "s", "ms", "us", "ns"]:
 | 
						||
        value = unit_rev.get(u)
 | 
						||
        if value is not None and value in arg:
 | 
						||
            try:
 | 
						||
                values += to_timedelta(coerce(arg[value]), unit=u, errors=errors)
 | 
						||
            except (TypeError, ValueError) as err:
 | 
						||
                raise ValueError(
 | 
						||
                    f"cannot assemble the datetimes [{value}]: {err}"
 | 
						||
                ) from err
 | 
						||
    return values
 | 
						||
 | 
						||
 | 
						||
def _attempt_YYYYMMDD(arg: np.ndarray, errors: str) -> np.ndarray | None:
 | 
						||
    """
 | 
						||
    try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
 | 
						||
    arg is a passed in as an object dtype, but could really be ints/strings
 | 
						||
    with nan-like/or floats (e.g. with nan)
 | 
						||
 | 
						||
    Parameters
 | 
						||
    ----------
 | 
						||
    arg : np.ndarray[object]
 | 
						||
    errors : {'raise','ignore','coerce'}
 | 
						||
    """
 | 
						||
 | 
						||
    def calc(carg):
 | 
						||
        # calculate the actual result
 | 
						||
        carg = carg.astype(object)
 | 
						||
        parsed = parsing.try_parse_year_month_day(
 | 
						||
            carg / 10000, carg / 100 % 100, carg % 100
 | 
						||
        )
 | 
						||
        return tslib.array_to_datetime(parsed, errors=errors)[0]
 | 
						||
 | 
						||
    def calc_with_mask(carg, mask):
 | 
						||
        result = np.empty(carg.shape, dtype="M8[ns]")
 | 
						||
        iresult = result.view("i8")
 | 
						||
        iresult[~mask] = iNaT
 | 
						||
 | 
						||
        masked_result = calc(carg[mask].astype(np.float64).astype(np.int64))
 | 
						||
        result[mask] = masked_result.astype("M8[ns]")
 | 
						||
        return result
 | 
						||
 | 
						||
    # try intlike / strings that are ints
 | 
						||
    try:
 | 
						||
        return calc(arg.astype(np.int64))
 | 
						||
    except (ValueError, OverflowError, TypeError):
 | 
						||
        pass
 | 
						||
 | 
						||
    # a float with actual np.nan
 | 
						||
    try:
 | 
						||
        carg = arg.astype(np.float64)
 | 
						||
        return calc_with_mask(carg, notna(carg))
 | 
						||
    except (ValueError, OverflowError, TypeError):
 | 
						||
        pass
 | 
						||
 | 
						||
    # string with NaN-like
 | 
						||
    try:
 | 
						||
        # error: Argument 2 to "isin" has incompatible type "List[Any]"; expected
 | 
						||
        # "Union[Union[ExtensionArray, ndarray], Index, Series]"
 | 
						||
        mask = ~algorithms.isin(arg, list(nat_strings))  # type: ignore[arg-type]
 | 
						||
        return calc_with_mask(arg, mask)
 | 
						||
    except (ValueError, OverflowError, TypeError):
 | 
						||
        pass
 | 
						||
 | 
						||
    return None
 | 
						||
 | 
						||
 | 
						||
def to_time(arg, format=None, infer_time_format=False, errors="raise"):
 | 
						||
    # GH#34145
 | 
						||
    warnings.warn(
 | 
						||
        "`to_time` has been moved, should be imported from pandas.core.tools.times. "
 | 
						||
        "This alias will be removed in a future version.",
 | 
						||
        FutureWarning,
 | 
						||
        stacklevel=find_stack_level(),
 | 
						||
    )
 | 
						||
    from pandas.core.tools.times import to_time
 | 
						||
 | 
						||
    return to_time(arg, format, infer_time_format, errors)
 |