Source code for swak.pl.frame.groupby_dynamic

from datetime import timedelta
from polars.dataframe.group_by import DynamicGroupBy
from polars import DataFrame
from polars._typing import IntoExpr, StartBy, ClosedInterval, Label
from ...misc import ArgRepr
from ..types import IntoExprs



[docs]
class GroupByDynamic(ArgRepr):
    """Partial of the polars dataframe `group_by_dynamic <dyngrp_>`__ method.

    Parameters
    ----------
    index_column: IntoExpr
        Column used to group based on the time window. Often of type Date or
        Datetime. This column must be sorted in ascending order (or, if
        group_by is specified, then it must be sorted in ascending order
        within each group). In case of a dynamic group by on indices, dtype
        needs to be Int32 or Int64. Note that Int32 gets temporarily cast to
        Int64, so if performance matters use an Int64 column.
    every: str or timedelta
        Interval of the window. Suffix string of integer number with the
        letter "i" to indicate indexing by integer columns.
    period: str or timedelta, optional
        Length of the window. Equals ‘every’ if set to ``None`` (the default).
    offset: str or timedelta
        Offset of the window. Does not take effect if `start_by` is
        "datapoint". Defaults to zero.
    include_boundaries: bool, optional
        Add the lower and upper bound of the window to the "_lower_boundary"
        and “_upper_boundary” columns. This will impact performance because it
        is harder to parallelize. Defaults to ``False``.
    closed: "left", "right", "both", "none"
        Define which sides of the temporal interval are closed (inclusive).
    label: "left", "right", "datapoint"
        Which label to use for the window, lower boundary, upper boundary, or
        first value of the index column in the given window. If you don't need
        the label to be at one of the boundaries, choose this option for
        maximum performance.
    group_by: IntoExpr, optional
        Also group by this column/these columns. Defaults to ``None``.
    start_by: "window", "datapoint", "monday", "tuesday", ...
        The strategy to determine the start of the first window by, where
        "window" takes the earliest timestamp, truncates it with `every`, and
        then adds `offset`. Weekly windows start on Monday. "datapoint" starts
        from the first encountered data point, whereas any day of the week
        starts the window at the weekday before the first data point.
        The resulting window is then shifted back until the earliest datapoint
        is in or in front of it.


    .. _dyngrp: https://docs.pola.rs/api/python/stable/reference/dataframe/api/
                polars.DataFrame.group_by_dynamic.html

    """

    def __init__(
            self,
            index_column: IntoExpr,
            every: str | timedelta,
            period: str | timedelta | None = None,
            offset: str | timedelta | None = None,
            include_boundaries: bool = False,
            closed: ClosedInterval = 'left',
            label: Label = 'left',
            group_by: IntoExprs | None = None,
            start_by: StartBy = 'window',
    ) -> None:
        self.index_column = index_column
        self.every = every
        self.period = period
        self.offset = offset
        self.include_boundaries = include_boundaries
        self.closed = closed.strip().lower()
        self.label = label.strip().lower()
        self.group_by = group_by
        self.start_by = start_by.strip().lower()
        super().__init__(
            index_column,
            every=every,
            period=period,
            offset=offset,
            closed=self.closed,
            label=self.label,
            group_by=group_by,
            start_by=self.start_by,
        )


[docs]
    def __call__(self, df: DataFrame) -> DynamicGroupBy:
        """Evaluate rolling-window aggregations on a polars dataframe.

        Parameters
        ----------
        df: DataFrame
            The dataframe to compute rolling-window aggregations on.

        Returns
        -------
        DataFrame
            The rolling-window aggregations.

        """
        return df.group_by_dynamic(
            self.index_column,
            every=self.every,
            period=self.period,
            offset=self.offset,
            closed=self.closed,
            label=self.label,
            group_by=self.group_by,
            start_by=self.start_by
        )