489 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			489 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import annotations
 | 
						|
 | 
						|
from typing import (
 | 
						|
    Callable,
 | 
						|
    Hashable,
 | 
						|
)
 | 
						|
import warnings
 | 
						|
 | 
						|
import numpy as np
 | 
						|
 | 
						|
from pandas._libs import (
 | 
						|
    index as libindex,
 | 
						|
    lib,
 | 
						|
)
 | 
						|
from pandas._typing import (
 | 
						|
    Dtype,
 | 
						|
    DtypeObj,
 | 
						|
    npt,
 | 
						|
)
 | 
						|
from pandas.util._decorators import (
 | 
						|
    cache_readonly,
 | 
						|
    doc,
 | 
						|
)
 | 
						|
from pandas.util._exceptions import find_stack_level
 | 
						|
 | 
						|
from pandas.core.dtypes.cast import astype_nansafe
 | 
						|
from pandas.core.dtypes.common import (
 | 
						|
    is_dtype_equal,
 | 
						|
    is_extension_array_dtype,
 | 
						|
    is_float,
 | 
						|
    is_float_dtype,
 | 
						|
    is_integer_dtype,
 | 
						|
    is_numeric_dtype,
 | 
						|
    is_scalar,
 | 
						|
    is_signed_integer_dtype,
 | 
						|
    is_unsigned_integer_dtype,
 | 
						|
    needs_i8_conversion,
 | 
						|
    pandas_dtype,
 | 
						|
)
 | 
						|
from pandas.core.dtypes.generic import ABCSeries
 | 
						|
 | 
						|
from pandas.core.indexes.base import (
 | 
						|
    Index,
 | 
						|
    maybe_extract_name,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
class NumericIndex(Index):
 | 
						|
    """
 | 
						|
    Immutable sequence used for indexing and alignment. The basic object
 | 
						|
    storing axis labels for all pandas objects. NumericIndex is a special case
 | 
						|
    of `Index` with purely numpy int/uint/float labels.
 | 
						|
 | 
						|
    .. versionadded:: 1.4.0
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    data : array-like (1-dimensional)
 | 
						|
    dtype : NumPy dtype (default: None)
 | 
						|
    copy : bool
 | 
						|
        Make a copy of input ndarray.
 | 
						|
    name : object
 | 
						|
        Name to be stored in the index.
 | 
						|
 | 
						|
    Attributes
 | 
						|
    ----------
 | 
						|
    None
 | 
						|
 | 
						|
    Methods
 | 
						|
    ----------
 | 
						|
    None
 | 
						|
 | 
						|
    See Also
 | 
						|
    --------
 | 
						|
    Index : The base pandas Index type.
 | 
						|
    Int64Index : Index of purely int64 labels (deprecated).
 | 
						|
    UInt64Index : Index of purely uint64 labels (deprecated).
 | 
						|
    Float64Index : Index of  purely float64 labels (deprecated).
 | 
						|
 | 
						|
    Notes
 | 
						|
    -----
 | 
						|
    An NumericIndex instance can **only** contain numpy int64/32/16/8, uint64/32/16/8 or
 | 
						|
    float64/32/16 dtype. In particular, ``NumericIndex`` *can not* hold Pandas numeric
 | 
						|
    dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
 | 
						|
    """
 | 
						|
 | 
						|
    _typ = "numericindex"
 | 
						|
    _values: np.ndarray
 | 
						|
    _default_dtype: np.dtype | None = None
 | 
						|
    _dtype_validation_metadata: tuple[Callable[..., bool], str] = (
 | 
						|
        is_numeric_dtype,
 | 
						|
        "numeric type",
 | 
						|
    )
 | 
						|
    _is_numeric_dtype = True
 | 
						|
    _can_hold_strings = False
 | 
						|
    _is_backward_compat_public_numeric_index: bool = True
 | 
						|
 | 
						|
    # error: Signature of "_can_hold_na" incompatible with supertype "Index"
 | 
						|
    @cache_readonly
 | 
						|
    def _can_hold_na(self) -> bool:  # type: ignore[override]
 | 
						|
        if is_float_dtype(self.dtype):
 | 
						|
            return True
 | 
						|
        else:
 | 
						|
            return False
 | 
						|
 | 
						|
    _engine_types: dict[np.dtype, type[libindex.IndexEngine]] = {
 | 
						|
        np.dtype(np.int8): libindex.Int8Engine,
 | 
						|
        np.dtype(np.int16): libindex.Int16Engine,
 | 
						|
        np.dtype(np.int32): libindex.Int32Engine,
 | 
						|
        np.dtype(np.int64): libindex.Int64Engine,
 | 
						|
        np.dtype(np.uint8): libindex.UInt8Engine,
 | 
						|
        np.dtype(np.uint16): libindex.UInt16Engine,
 | 
						|
        np.dtype(np.uint32): libindex.UInt32Engine,
 | 
						|
        np.dtype(np.uint64): libindex.UInt64Engine,
 | 
						|
        np.dtype(np.float32): libindex.Float32Engine,
 | 
						|
        np.dtype(np.float64): libindex.Float64Engine,
 | 
						|
    }
 | 
						|
 | 
						|
    @property
 | 
						|
    def _engine_type(self):
 | 
						|
        # error: Invalid index type "Union[dtype[Any], ExtensionDtype]" for
 | 
						|
        # "Dict[dtype[Any], Type[IndexEngine]]"; expected type "dtype[Any]"
 | 
						|
        return self._engine_types[self.dtype]  # type: ignore[index]
 | 
						|
 | 
						|
    @cache_readonly
 | 
						|
    def inferred_type(self) -> str:
 | 
						|
        return {
 | 
						|
            "i": "integer",
 | 
						|
            "u": "integer",
 | 
						|
            "f": "floating",
 | 
						|
        }[self.dtype.kind]
 | 
						|
 | 
						|
    def __new__(cls, data=None, dtype: Dtype | None = None, copy=False, name=None):
 | 
						|
        name = maybe_extract_name(name, data, cls)
 | 
						|
 | 
						|
        subarr = cls._ensure_array(data, dtype, copy)
 | 
						|
        return cls._simple_new(subarr, name=name)
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def _ensure_array(cls, data, dtype, copy: bool):
 | 
						|
        """
 | 
						|
        Ensure we have a valid array to pass to _simple_new.
 | 
						|
        """
 | 
						|
        cls._validate_dtype(dtype)
 | 
						|
 | 
						|
        if not isinstance(data, (np.ndarray, Index)):
 | 
						|
            # Coerce to ndarray if not already ndarray or Index
 | 
						|
            if is_scalar(data):
 | 
						|
                raise cls._scalar_data_error(data)
 | 
						|
 | 
						|
            # other iterable of some kind
 | 
						|
            if not isinstance(data, (ABCSeries, list, tuple)):
 | 
						|
                data = list(data)
 | 
						|
 | 
						|
            orig = data
 | 
						|
            data = np.asarray(data, dtype=dtype)
 | 
						|
            if dtype is None and data.dtype.kind == "f":
 | 
						|
                if cls is UInt64Index and (data >= 0).all():
 | 
						|
                    # https://github.com/numpy/numpy/issues/19146
 | 
						|
                    data = np.asarray(orig, dtype=np.uint64)
 | 
						|
 | 
						|
        if issubclass(data.dtype.type, str):
 | 
						|
            cls._string_data_error(data)
 | 
						|
 | 
						|
        dtype = cls._ensure_dtype(dtype)
 | 
						|
 | 
						|
        if copy or not is_dtype_equal(data.dtype, dtype):
 | 
						|
            # TODO: the try/except below is because it's difficult to predict the error
 | 
						|
            # and/or error message from different combinations of data and dtype.
 | 
						|
            # Efforts to avoid this try/except welcome.
 | 
						|
            # See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222
 | 
						|
            try:
 | 
						|
                subarr = np.array(data, dtype=dtype, copy=copy)
 | 
						|
                cls._validate_dtype(subarr.dtype)
 | 
						|
            except (TypeError, ValueError):
 | 
						|
                raise ValueError(f"data is not compatible with {cls.__name__}")
 | 
						|
            cls._assert_safe_casting(data, subarr)
 | 
						|
        else:
 | 
						|
            subarr = data
 | 
						|
 | 
						|
        if subarr.ndim > 1:
 | 
						|
            # GH#13601, GH#20285, GH#27125
 | 
						|
            raise ValueError("Index data must be 1-dimensional")
 | 
						|
 | 
						|
        subarr = np.asarray(subarr)
 | 
						|
        return subarr
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def _validate_dtype(cls, dtype: Dtype | None) -> None:
 | 
						|
        if dtype is None:
 | 
						|
            return
 | 
						|
 | 
						|
        validation_func, expected = cls._dtype_validation_metadata
 | 
						|
        if not validation_func(dtype):
 | 
						|
            raise ValueError(
 | 
						|
                f"Incorrect `dtype` passed: expected {expected}, received {dtype}"
 | 
						|
            )
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
 | 
						|
        """
 | 
						|
        Ensure int64 dtype for Int64Index etc. but allow int32 etc. for NumericIndex.
 | 
						|
 | 
						|
        Assumes dtype has already been validated.
 | 
						|
        """
 | 
						|
        if dtype is None:
 | 
						|
            return cls._default_dtype
 | 
						|
 | 
						|
        dtype = pandas_dtype(dtype)
 | 
						|
        assert isinstance(dtype, np.dtype)
 | 
						|
 | 
						|
        if cls._is_backward_compat_public_numeric_index:
 | 
						|
            # dtype for NumericIndex
 | 
						|
            return dtype
 | 
						|
        else:
 | 
						|
            # dtype for Int64Index, UInt64Index etc. Needed for backwards compat.
 | 
						|
            return cls._default_dtype
 | 
						|
 | 
						|
    def __contains__(self, key) -> bool:
 | 
						|
        """
 | 
						|
        Check if key is a float and has a decimal. If it has, return False.
 | 
						|
        """
 | 
						|
        if not is_integer_dtype(self.dtype):
 | 
						|
            return super().__contains__(key)
 | 
						|
 | 
						|
        hash(key)
 | 
						|
        try:
 | 
						|
            if is_float(key) and int(key) != key:
 | 
						|
                # otherwise the `key in self._engine` check casts e.g. 1.1 -> 1
 | 
						|
                return False
 | 
						|
            return key in self._engine
 | 
						|
        except (OverflowError, TypeError, ValueError):
 | 
						|
            return False
 | 
						|
 | 
						|
    @doc(Index.astype)
 | 
						|
    def astype(self, dtype, copy: bool = True):
 | 
						|
        dtype = pandas_dtype(dtype)
 | 
						|
        if is_float_dtype(self.dtype):
 | 
						|
            if needs_i8_conversion(dtype):
 | 
						|
                raise TypeError(
 | 
						|
                    f"Cannot convert Float64Index to dtype {dtype}; integer "
 | 
						|
                    "values are required for conversion"
 | 
						|
                )
 | 
						|
            elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype):
 | 
						|
                # TODO(ExtensionIndex); this can change once we have an EA Index type
 | 
						|
                # GH 13149
 | 
						|
                arr = astype_nansafe(self._values, dtype=dtype)
 | 
						|
                if isinstance(self, Float64Index):
 | 
						|
                    return Int64Index(arr, name=self.name)
 | 
						|
                else:
 | 
						|
                    return NumericIndex(arr, name=self.name, dtype=dtype)
 | 
						|
        elif self._is_backward_compat_public_numeric_index:
 | 
						|
            # this block is needed so e.g. NumericIndex[int8].astype("int32") returns
 | 
						|
            # NumericIndex[int32] and not Int64Index with dtype int64.
 | 
						|
            # When Int64Index etc. are removed from the code base, removed this also.
 | 
						|
            if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype):
 | 
						|
                return self._constructor(self, dtype=dtype, copy=copy)
 | 
						|
 | 
						|
        return super().astype(dtype, copy=copy)
 | 
						|
 | 
						|
    # ----------------------------------------------------------------
 | 
						|
    # Indexing Methods
 | 
						|
 | 
						|
    # error: Decorated property not supported
 | 
						|
    @cache_readonly  # type: ignore[misc]
 | 
						|
    @doc(Index._should_fallback_to_positional)
 | 
						|
    def _should_fallback_to_positional(self) -> bool:
 | 
						|
        return False
 | 
						|
 | 
						|
    @doc(Index._convert_slice_indexer)
 | 
						|
    def _convert_slice_indexer(self, key: slice, kind: str):
 | 
						|
        if is_float_dtype(self.dtype):
 | 
						|
            assert kind in ["loc", "getitem"]
 | 
						|
 | 
						|
            # We always treat __getitem__ slicing as label-based
 | 
						|
            # translate to locations
 | 
						|
            return self.slice_indexer(key.start, key.stop, key.step)
 | 
						|
 | 
						|
        return super()._convert_slice_indexer(key, kind=kind)
 | 
						|
 | 
						|
    @doc(Index._maybe_cast_slice_bound)
 | 
						|
    def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
 | 
						|
        assert kind in ["loc", "getitem", None, lib.no_default]
 | 
						|
        self._deprecated_arg(kind, "kind", "_maybe_cast_slice_bound")
 | 
						|
 | 
						|
        # we will try to coerce to integers
 | 
						|
        return self._maybe_cast_indexer(label)
 | 
						|
 | 
						|
    # ----------------------------------------------------------------
 | 
						|
 | 
						|
    @doc(Index._shallow_copy)
 | 
						|
    def _shallow_copy(self, values, name: Hashable = lib.no_default):
 | 
						|
        if not self._can_hold_na and values.dtype.kind == "f":
 | 
						|
            name = self._name if name is lib.no_default else name
 | 
						|
            # Ensure we are not returning an Int64Index with float data:
 | 
						|
            return Float64Index._simple_new(values, name=name)
 | 
						|
        return super()._shallow_copy(values=values, name=name)
 | 
						|
 | 
						|
    def _convert_tolerance(self, tolerance, target):
 | 
						|
        tolerance = super()._convert_tolerance(tolerance, target)
 | 
						|
 | 
						|
        if not np.issubdtype(tolerance.dtype, np.number):
 | 
						|
            if tolerance.ndim > 0:
 | 
						|
                raise ValueError(
 | 
						|
                    f"tolerance argument for {type(self).__name__} must contain "
 | 
						|
                    "numeric elements if it is list type"
 | 
						|
                )
 | 
						|
            else:
 | 
						|
                raise ValueError(
 | 
						|
                    f"tolerance argument for {type(self).__name__} must be numeric "
 | 
						|
                    f"if it is a scalar: {repr(tolerance)}"
 | 
						|
                )
 | 
						|
        return tolerance
 | 
						|
 | 
						|
    def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
 | 
						|
        # If we ever have BoolIndex or ComplexIndex, this may need to be tightened
 | 
						|
        return is_numeric_dtype(dtype)
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None:
 | 
						|
        """
 | 
						|
        Ensure incoming data can be represented with matching signed-ness.
 | 
						|
 | 
						|
        Needed if the process of casting data from some accepted dtype to the internal
 | 
						|
        dtype(s) bears the risk of truncation (e.g. float to int).
 | 
						|
        """
 | 
						|
        if is_integer_dtype(subarr.dtype):
 | 
						|
            if not np.array_equal(data, subarr):
 | 
						|
                raise TypeError("Unsafe NumPy casting, you must explicitly cast")
 | 
						|
 | 
						|
    @property
 | 
						|
    def _is_all_dates(self) -> bool:
 | 
						|
        """
 | 
						|
        Checks that all the labels are datetime objects.
 | 
						|
        """
 | 
						|
        return False
 | 
						|
 | 
						|
    def _format_native_types(
 | 
						|
        self, *, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs
 | 
						|
    ):
 | 
						|
        from pandas.io.formats.format import FloatArrayFormatter
 | 
						|
 | 
						|
        if is_float_dtype(self.dtype):
 | 
						|
            formatter = FloatArrayFormatter(
 | 
						|
                self._values,
 | 
						|
                na_rep=na_rep,
 | 
						|
                float_format=float_format,
 | 
						|
                decimal=decimal,
 | 
						|
                quoting=quoting,
 | 
						|
                fixed_width=False,
 | 
						|
            )
 | 
						|
            return formatter.get_result_as_array()
 | 
						|
 | 
						|
        return super()._format_native_types(
 | 
						|
            na_rep=na_rep,
 | 
						|
            float_format=float_format,
 | 
						|
            decimal=decimal,
 | 
						|
            quoting=quoting,
 | 
						|
            **kwargs,
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
_num_index_shared_docs = {}
 | 
						|
 | 
						|
 | 
						|
_num_index_shared_docs[
 | 
						|
    "class_descr"
 | 
						|
] = """
 | 
						|
    Immutable sequence used for indexing and alignment. The basic object
 | 
						|
    storing axis labels for all pandas objects. %(klass)s is a special case
 | 
						|
    of `Index` with purely %(ltype)s labels. %(extra)s.
 | 
						|
 | 
						|
    .. deprecated:: 1.4.0
 | 
						|
        In pandas v2.0 %(klass)s will be removed and :class:`NumericIndex` used instead.
 | 
						|
        %(klass)s will remain fully functional for the duration of pandas 1.x.
 | 
						|
 | 
						|
    Parameters
 | 
						|
    ----------
 | 
						|
    data : array-like (1-dimensional)
 | 
						|
    dtype : NumPy dtype (default: %(dtype)s)
 | 
						|
    copy : bool
 | 
						|
        Make a copy of input ndarray.
 | 
						|
    name : object
 | 
						|
        Name to be stored in the index.
 | 
						|
 | 
						|
    Attributes
 | 
						|
    ----------
 | 
						|
    None
 | 
						|
 | 
						|
    Methods
 | 
						|
    ----------
 | 
						|
    None
 | 
						|
 | 
						|
    See Also
 | 
						|
    --------
 | 
						|
    Index : The base pandas Index type.
 | 
						|
    NumericIndex : Index of numpy int/uint/float data.
 | 
						|
 | 
						|
    Notes
 | 
						|
    -----
 | 
						|
    An Index instance can **only** contain hashable objects.
 | 
						|
"""
 | 
						|
 | 
						|
 | 
						|
class IntegerIndex(NumericIndex):
 | 
						|
    """
 | 
						|
    This is an abstract class for Int64Index, UInt64Index.
 | 
						|
    """
 | 
						|
 | 
						|
    _is_backward_compat_public_numeric_index: bool = False
 | 
						|
 | 
						|
    @property
 | 
						|
    def asi8(self) -> npt.NDArray[np.int64]:
 | 
						|
        # do not cache or you'll create a memory leak
 | 
						|
        warnings.warn(
 | 
						|
            "Index.asi8 is deprecated and will be removed in a future version.",
 | 
						|
            FutureWarning,
 | 
						|
            stacklevel=find_stack_level(),
 | 
						|
        )
 | 
						|
        return self._values.view(self._default_dtype)
 | 
						|
 | 
						|
    def _validate_fill_value(self, value):
 | 
						|
        # e.g. np.array([1.0]) we want np.array([1], dtype=self.dtype)
 | 
						|
        #  see TestSetitemFloatNDarrayIntoIntegerSeries
 | 
						|
        super()._validate_fill_value(value)
 | 
						|
        if hasattr(value, "dtype") and is_float_dtype(value.dtype):
 | 
						|
            converted = value.astype(self.dtype)
 | 
						|
            if (converted == value).all():
 | 
						|
                # See also: can_hold_element
 | 
						|
                return converted
 | 
						|
            raise TypeError
 | 
						|
        return value
 | 
						|
 | 
						|
 | 
						|
class Int64Index(IntegerIndex):
 | 
						|
    _index_descr_args = {
 | 
						|
        "klass": "Int64Index",
 | 
						|
        "ltype": "integer",
 | 
						|
        "dtype": "int64",
 | 
						|
        "extra": "",
 | 
						|
    }
 | 
						|
    __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args
 | 
						|
 | 
						|
    _typ = "int64index"
 | 
						|
    _engine_type = libindex.Int64Engine
 | 
						|
    _default_dtype = np.dtype(np.int64)
 | 
						|
    _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer")
 | 
						|
 | 
						|
 | 
						|
class UInt64Index(IntegerIndex):
 | 
						|
    _index_descr_args = {
 | 
						|
        "klass": "UInt64Index",
 | 
						|
        "ltype": "unsigned integer",
 | 
						|
        "dtype": "uint64",
 | 
						|
        "extra": "",
 | 
						|
    }
 | 
						|
    __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args
 | 
						|
 | 
						|
    _typ = "uint64index"
 | 
						|
    _engine_type = libindex.UInt64Engine
 | 
						|
    _default_dtype = np.dtype(np.uint64)
 | 
						|
    _dtype_validation_metadata = (is_unsigned_integer_dtype, "unsigned integer")
 | 
						|
 | 
						|
    def _validate_fill_value(self, value):
 | 
						|
        # e.g. np.array([1]) we want np.array([1], dtype=np.uint64)
 | 
						|
        #  see test_where_uin64
 | 
						|
        super()._validate_fill_value(value)
 | 
						|
        if hasattr(value, "dtype") and is_signed_integer_dtype(value.dtype):
 | 
						|
            if (value >= 0).all():
 | 
						|
                return value.astype(self.dtype)
 | 
						|
            raise TypeError
 | 
						|
        return value
 | 
						|
 | 
						|
 | 
						|
class Float64Index(NumericIndex):
 | 
						|
    _index_descr_args = {
 | 
						|
        "klass": "Float64Index",
 | 
						|
        "dtype": "float64",
 | 
						|
        "ltype": "float",
 | 
						|
        "extra": "",
 | 
						|
    }
 | 
						|
    __doc__ = _num_index_shared_docs["class_descr"] % _index_descr_args
 | 
						|
 | 
						|
    _typ = "float64index"
 | 
						|
    _engine_type = libindex.Float64Engine
 | 
						|
    _default_dtype = np.dtype(np.float64)
 | 
						|
    _dtype_validation_metadata = (is_float_dtype, "float")
 | 
						|
    _is_backward_compat_public_numeric_index: bool = False
 |