304 lines
		
	
	
		
			9.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			304 lines
		
	
	
		
			9.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import annotations
 | 
						|
 | 
						|
from typing import (
 | 
						|
    TYPE_CHECKING,
 | 
						|
    Iterable,
 | 
						|
    Literal,
 | 
						|
    cast,
 | 
						|
)
 | 
						|
 | 
						|
import numpy as np
 | 
						|
 | 
						|
from pandas._typing import PositionalIndexer
 | 
						|
from pandas.util._decorators import (
 | 
						|
    cache_readonly,
 | 
						|
    doc,
 | 
						|
)
 | 
						|
 | 
						|
from pandas.core.dtypes.common import (
 | 
						|
    is_integer,
 | 
						|
    is_list_like,
 | 
						|
)
 | 
						|
 | 
						|
if TYPE_CHECKING:
 | 
						|
    from pandas import (
 | 
						|
        DataFrame,
 | 
						|
        Series,
 | 
						|
    )
 | 
						|
    from pandas.core.groupby import groupby
 | 
						|
 | 
						|
 | 
						|
class GroupByIndexingMixin:
 | 
						|
    """
 | 
						|
    Mixin for adding ._positional_selector to GroupBy.
 | 
						|
    """
 | 
						|
 | 
						|
    @cache_readonly
 | 
						|
    def _positional_selector(self) -> GroupByPositionalSelector:
 | 
						|
        """
 | 
						|
        Return positional selection for each group.
 | 
						|
 | 
						|
        ``groupby._positional_selector[i:j]`` is similar to
 | 
						|
        ``groupby.apply(lambda x: x.iloc[i:j])``
 | 
						|
        but much faster and preserves the original index and order.
 | 
						|
 | 
						|
        ``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head`
 | 
						|
        and :meth:`~GroupBy.tail`. For example:
 | 
						|
 | 
						|
        - ``head(5)``
 | 
						|
        - ``_positional_selector[5:-5]``
 | 
						|
        - ``tail(5)``
 | 
						|
 | 
						|
        together return all the rows.
 | 
						|
 | 
						|
        Allowed inputs for the index are:
 | 
						|
 | 
						|
        - An integer valued iterable, e.g. ``range(2, 4)``.
 | 
						|
        - A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``.
 | 
						|
 | 
						|
        The output format is the same as :meth:`~GroupBy.head` and
 | 
						|
        :meth:`~GroupBy.tail`, namely
 | 
						|
        a subset of the ``DataFrame`` or ``Series`` with the index and order preserved.
 | 
						|
 | 
						|
        Returns
 | 
						|
        -------
 | 
						|
        Series
 | 
						|
            The filtered subset of the original Series.
 | 
						|
        DataFrame
 | 
						|
            The filtered subset of the original DataFrame.
 | 
						|
 | 
						|
        See Also
 | 
						|
        --------
 | 
						|
        DataFrame.iloc : Purely integer-location based indexing for selection by
 | 
						|
            position.
 | 
						|
        GroupBy.head : Return first n rows of each group.
 | 
						|
        GroupBy.tail : Return last n rows of each group.
 | 
						|
        GroupBy.nth : Take the nth row from each group if n is an int, or a
 | 
						|
            subset of rows, if n is a list of ints.
 | 
						|
 | 
						|
        Notes
 | 
						|
        -----
 | 
						|
        - The slice step cannot be negative.
 | 
						|
        - If the index specification results in overlaps, the item is not duplicated.
 | 
						|
        - If the index specification changes the order of items, then
 | 
						|
          they are returned in their original order.
 | 
						|
          By contrast, ``DataFrame.iloc`` can change the row order.
 | 
						|
        - ``groupby()`` parameters such as as_index and dropna are ignored.
 | 
						|
 | 
						|
        The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth`
 | 
						|
        with ``as_index=False`` are:
 | 
						|
 | 
						|
        - Input to ``_positional_selector`` can include
 | 
						|
          one or more slices whereas ``nth``
 | 
						|
          just handles an integer or a list of integers.
 | 
						|
        - ``_positional_selector`` can  accept a slice relative to the
 | 
						|
          last row of each group.
 | 
						|
        - ``_positional_selector`` does not have an equivalent to the
 | 
						|
          ``nth()`` ``dropna`` parameter.
 | 
						|
 | 
						|
        Examples
 | 
						|
        --------
 | 
						|
        >>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]],
 | 
						|
        ...                   columns=["A", "B"])
 | 
						|
        >>> df.groupby("A")._positional_selector[1:2]
 | 
						|
           A  B
 | 
						|
        1  a  2
 | 
						|
        4  b  5
 | 
						|
 | 
						|
        >>> df.groupby("A")._positional_selector[1, -1]
 | 
						|
           A  B
 | 
						|
        1  a  2
 | 
						|
        2  a  3
 | 
						|
        4  b  5
 | 
						|
        """
 | 
						|
        if TYPE_CHECKING:
 | 
						|
            groupby_self = cast(groupby.GroupBy, self)
 | 
						|
        else:
 | 
						|
            groupby_self = self
 | 
						|
 | 
						|
        return GroupByPositionalSelector(groupby_self)
 | 
						|
 | 
						|
    def _make_mask_from_positional_indexer(
 | 
						|
        self,
 | 
						|
        arg: PositionalIndexer | tuple,
 | 
						|
    ) -> np.ndarray:
 | 
						|
        if is_list_like(arg):
 | 
						|
            if all(is_integer(i) for i in cast(Iterable, arg)):
 | 
						|
                mask = self._make_mask_from_list(cast(Iterable[int], arg))
 | 
						|
            else:
 | 
						|
                mask = self._make_mask_from_tuple(cast(tuple, arg))
 | 
						|
 | 
						|
        elif isinstance(arg, slice):
 | 
						|
            mask = self._make_mask_from_slice(arg)
 | 
						|
        elif is_integer(arg):
 | 
						|
            mask = self._make_mask_from_int(cast(int, arg))
 | 
						|
        else:
 | 
						|
            raise TypeError(
 | 
						|
                f"Invalid index {type(arg)}. "
 | 
						|
                "Must be integer, list-like, slice or a tuple of "
 | 
						|
                "integers and slices"
 | 
						|
            )
 | 
						|
 | 
						|
        if isinstance(mask, bool):
 | 
						|
            if mask:
 | 
						|
                mask = self._ascending_count >= 0
 | 
						|
            else:
 | 
						|
                mask = self._ascending_count < 0
 | 
						|
 | 
						|
        return cast(np.ndarray, mask)
 | 
						|
 | 
						|
    def _make_mask_from_int(self, arg: int) -> np.ndarray:
 | 
						|
        if arg >= 0:
 | 
						|
            return self._ascending_count == arg
 | 
						|
        else:
 | 
						|
            return self._descending_count == (-arg - 1)
 | 
						|
 | 
						|
    def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray:
 | 
						|
        positive = [arg for arg in args if arg >= 0]
 | 
						|
        negative = [-arg - 1 for arg in args if arg < 0]
 | 
						|
 | 
						|
        mask: bool | np.ndarray = False
 | 
						|
 | 
						|
        if positive:
 | 
						|
            mask |= np.isin(self._ascending_count, positive)
 | 
						|
 | 
						|
        if negative:
 | 
						|
            mask |= np.isin(self._descending_count, negative)
 | 
						|
 | 
						|
        return mask
 | 
						|
 | 
						|
    def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray:
 | 
						|
        mask: bool | np.ndarray = False
 | 
						|
 | 
						|
        for arg in args:
 | 
						|
            if is_integer(arg):
 | 
						|
                mask |= self._make_mask_from_int(cast(int, arg))
 | 
						|
            elif isinstance(arg, slice):
 | 
						|
                mask |= self._make_mask_from_slice(arg)
 | 
						|
            else:
 | 
						|
                raise ValueError(
 | 
						|
                    f"Invalid argument {type(arg)}. Should be int or slice."
 | 
						|
                )
 | 
						|
 | 
						|
        return mask
 | 
						|
 | 
						|
    def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray:
 | 
						|
        start = arg.start
 | 
						|
        stop = arg.stop
 | 
						|
        step = arg.step
 | 
						|
 | 
						|
        if step is not None and step < 0:
 | 
						|
            raise ValueError(f"Invalid step {step}. Must be non-negative")
 | 
						|
 | 
						|
        mask: bool | np.ndarray = True
 | 
						|
 | 
						|
        if step is None:
 | 
						|
            step = 1
 | 
						|
 | 
						|
        if start is None:
 | 
						|
            if step > 1:
 | 
						|
                mask &= self._ascending_count % step == 0
 | 
						|
 | 
						|
        elif start >= 0:
 | 
						|
            mask &= self._ascending_count >= start
 | 
						|
 | 
						|
            if step > 1:
 | 
						|
                mask &= (self._ascending_count - start) % step == 0
 | 
						|
 | 
						|
        else:
 | 
						|
            mask &= self._descending_count < -start
 | 
						|
 | 
						|
            offset_array = self._descending_count + start + 1
 | 
						|
            limit_array = (
 | 
						|
                self._ascending_count + self._descending_count + (start + 1)
 | 
						|
            ) < 0
 | 
						|
            offset_array = np.where(limit_array, self._ascending_count, offset_array)
 | 
						|
 | 
						|
            mask &= offset_array % step == 0
 | 
						|
 | 
						|
        if stop is not None:
 | 
						|
            if stop >= 0:
 | 
						|
                mask &= self._ascending_count < stop
 | 
						|
            else:
 | 
						|
                mask &= self._descending_count >= -stop
 | 
						|
 | 
						|
        return mask
 | 
						|
 | 
						|
    @cache_readonly
 | 
						|
    def _ascending_count(self) -> np.ndarray:
 | 
						|
        if TYPE_CHECKING:
 | 
						|
            groupby_self = cast(groupby.GroupBy, self)
 | 
						|
        else:
 | 
						|
            groupby_self = self
 | 
						|
 | 
						|
        return groupby_self._cumcount_array()
 | 
						|
 | 
						|
    @cache_readonly
 | 
						|
    def _descending_count(self) -> np.ndarray:
 | 
						|
        if TYPE_CHECKING:
 | 
						|
            groupby_self = cast(groupby.GroupBy, self)
 | 
						|
        else:
 | 
						|
            groupby_self = self
 | 
						|
 | 
						|
        return groupby_self._cumcount_array(ascending=False)
 | 
						|
 | 
						|
 | 
						|
@doc(GroupByIndexingMixin._positional_selector)
 | 
						|
class GroupByPositionalSelector:
 | 
						|
    def __init__(self, groupby_object: groupby.GroupBy):
 | 
						|
        self.groupby_object = groupby_object
 | 
						|
 | 
						|
    def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series:
 | 
						|
        """
 | 
						|
        Select by positional index per group.
 | 
						|
 | 
						|
        Implements GroupBy._positional_selector
 | 
						|
 | 
						|
        Parameters
 | 
						|
        ----------
 | 
						|
        arg : PositionalIndexer | tuple
 | 
						|
            Allowed values are:
 | 
						|
            - int
 | 
						|
            - int valued iterable such as list or range
 | 
						|
            - slice with step either None or positive
 | 
						|
            - tuple of integers and slices
 | 
						|
 | 
						|
        Returns
 | 
						|
        -------
 | 
						|
        Series
 | 
						|
            The filtered subset of the original groupby Series.
 | 
						|
        DataFrame
 | 
						|
            The filtered subset of the original groupby DataFrame.
 | 
						|
 | 
						|
        See Also
 | 
						|
        --------
 | 
						|
        DataFrame.iloc : Integer-location based indexing for selection by position.
 | 
						|
        GroupBy.head : Return first n rows of each group.
 | 
						|
        GroupBy.tail : Return last n rows of each group.
 | 
						|
        GroupBy._positional_selector : Return positional selection for each group.
 | 
						|
        GroupBy.nth : Take the nth row from each group if n is an int, or a
 | 
						|
            subset of rows, if n is a list of ints.
 | 
						|
        """
 | 
						|
        self.groupby_object._reset_group_selection()
 | 
						|
        mask = self.groupby_object._make_mask_from_positional_indexer(arg)
 | 
						|
        return self.groupby_object._mask_selected_obj(mask)
 | 
						|
 | 
						|
 | 
						|
class GroupByNthSelector:
 | 
						|
    """
 | 
						|
    Dynamically substituted for GroupBy.nth to enable both call and index
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, groupby_object: groupby.GroupBy):
 | 
						|
        self.groupby_object = groupby_object
 | 
						|
 | 
						|
    def __call__(
 | 
						|
        self,
 | 
						|
        n: PositionalIndexer | tuple,
 | 
						|
        dropna: Literal["any", "all", None] = None,
 | 
						|
    ) -> DataFrame | Series:
 | 
						|
        return self.groupby_object.nth_actual(n, dropna)
 | 
						|
 | 
						|
    def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series:
 | 
						|
        return self.groupby_object.nth_actual(n)
 |