from datetime import timedelta
from polars.dataframe.group_by import DynamicGroupBy
from polars import DataFrame
from polars._typing import IntoExpr, StartBy, ClosedInterval, Label
from ...misc import ArgRepr
from ..types import IntoExprs
[docs]
class GroupByDynamic(ArgRepr):
"""Partial of the polars dataframe `group_by_dynamic <dyngrp_>`__ method.
Parameters
----------
index_column: IntoExpr
Column used to group based on the time window. Often of type Date or
Datetime. This column must be sorted in ascending order (or, if
group_by is specified, then it must be sorted in ascending order
within each group). In case of a dynamic group by on indices, dtype
needs to be Int32 or Int64. Note that Int32 gets temporarily cast to
Int64, so if performance matters use an Int64 column.
every: str or timedelta
Interval of the window. Suffix string of integer number with the
letter "i" to indicate indexing by integer columns.
period: str or timedelta, optional
Length of the window. Equals ‘every’ if set to ``None`` (the default).
offset: str or timedelta
Offset of the window. Does not take effect if `start_by` is
"datapoint". Defaults to zero.
include_boundaries: bool, optional
Add the lower and upper bound of the window to the "_lower_boundary"
and “_upper_boundary” columns. This will impact performance because it
is harder to parallelize. Defaults to ``False``.
closed: "left", "right", "both", "none"
Define which sides of the temporal interval are closed (inclusive).
label: "left", "right", "datapoint"
Which label to use for the window, lower boundary, upper boundary, or
first value of the index column in the given window. If you don't need
the label to be at one of the boundaries, choose this option for
maximum performance.
group_by: IntoExpr, optional
Also group by this column/these columns. Defaults to ``None``.
start_by: "window", "datapoint", "monday", "tuesday", ...
The strategy to determine the start of the first window by, where
"window" takes the earliest timestamp, truncates it with `every`, and
then adds `offset`. Weekly windows start on Monday. "datapoint" starts
from the first encountered data point, whereas any day of the week
starts the window at the weekday before the first data point.
The resulting window is then shifted back until the earliest datapoint
is in or in front of it.
.. _dyngrp: https://docs.pola.rs/api/python/stable/reference/dataframe/api/
polars.DataFrame.group_by_dynamic.html
"""
def __init__(
self,
index_column: IntoExpr,
every: str | timedelta,
period: str | timedelta | None = None,
offset: str | timedelta | None = None,
include_boundaries: bool = False,
closed: ClosedInterval = 'left',
label: Label = 'left',
group_by: IntoExprs | None = None,
start_by: StartBy = 'window',
) -> None:
self.index_column = index_column
self.every = every
self.period = period
self.offset = offset
self.include_boundaries = include_boundaries
self.closed = closed.strip().lower()
self.label = label.strip().lower()
self.group_by = group_by
self.start_by = start_by.strip().lower()
super().__init__(
index_column,
every=every,
period=period,
offset=offset,
closed=self.closed,
label=self.label,
group_by=group_by,
start_by=self.start_by,
)
[docs]
def __call__(self, df: DataFrame) -> DynamicGroupBy:
"""Evaluate rolling-window aggregations on a polars dataframe.
Parameters
----------
df: DataFrame
The dataframe to compute rolling-window aggregations on.
Returns
-------
DataFrame
The rolling-window aggregations.
"""
return df.group_by_dynamic(
self.index_column,
every=self.every,
period=self.period,
offset=self.offset,
closed=self.closed,
label=self.label,
group_by=self.group_by,
start_by=self.start_by
)