Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions lilio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
>>> calendar.show() # doctest: +NORMALIZE_WHITESPACE
i_interval -1 1
anchor_year
2020 [2020-07-04, 2020-12-31) [2020-12-31, 2021-06-29)
2020 [2020-07-04, 2020-12-30] [2020-12-31, 2021-06-28]


Get the 180-day periods leading up to New Year's eve for 2020 - 2022 inclusive.

Expand All @@ -43,22 +44,21 @@
>>> calendar.show() # doctest: +NORMALIZE_WHITESPACE
i_interval -1 1
anchor_year
2022 [2022-07-04, 2022-12-31) [2022-12-31, 2023-06-29)
2021 [2021-07-04, 2021-12-31) [2021-12-31, 2022-06-29)
2020 [2020-07-04, 2020-12-31) [2020-12-31, 2021-06-29)
2022 [2022-07-04, 2022-12-30] [2022-12-31, 2023-06-28]
2021 [2021-07-04, 2021-12-30] [2021-12-31, 2022-06-28]
2020 [2020-07-04, 2020-12-30] [2020-12-31, 2021-06-28]

To get a stacked representation:

>>> calendar.map_years(2020, 2022).flat
anchor_year i_interval
2022 -1 [2022-07-04, 2022-12-31)
1 [2022-12-31, 2023-06-29)
2021 -1 [2021-07-04, 2021-12-31)
1 [2021-12-31, 2022-06-29)
2020 -1 [2020-07-04, 2020-12-31)
1 [2020-12-31, 2021-06-29)
2022 -1 [2022-07-04, 2022-12-30]
1 [2022-12-31, 2023-06-28]
2021 -1 [2021-07-04, 2021-12-30]
1 [2021-12-31, 2022-06-28]
2020 -1 [2020-07-04, 2020-12-30]
1 [2020-12-31, 2021-06-28]
dtype: interval

"""

import logging
Expand Down
1 change: 1 addition & 0 deletions lilio/_bokeh_plots.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Calendar plotting implementations (bokeh specific code)."""

import sys
import typing
import numpy as np
Expand Down
1 change: 1 addition & 0 deletions lilio/_plot.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Calendar plotting implementations (general and matplotlib)."""

import typing
import matplotlib.pyplot as plt
import numpy as np
Expand Down
18 changes: 10 additions & 8 deletions lilio/calendar.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Lilio's main Calendar module."""

import copy
import re
import warnings
Expand Down Expand Up @@ -376,20 +377,20 @@ def _concatenate_periods(self, year, list_periods, is_target):
# loop through all the building blocks to
for block in list_periods:
left_date += block.gap_dateoffset
right_date = left_date + block.length_dateoffset
intervals.append(pd.Interval(left_date, right_date, closed="left"))
right_date = left_date + block.length_dateoffset - pd.Timedelta(days=1)
intervals.append(pd.Interval(left_date, right_date, closed="both"))
# update left date
left_date = right_date
left_date = right_date + pd.Timedelta(days=1)
else:
# build from right to left
right_date = self._get_anchor(year)
right_date = self._get_anchor(year) - pd.Timedelta(days=1)
# loop through all the building blocks to
for block in list_periods:
right_date -= block.gap_dateoffset
left_date = right_date - block.length_dateoffset
intervals.append(pd.Interval(left_date, right_date, closed="left"))
left_date = right_date - block.length_dateoffset + pd.Timedelta(days=1)
intervals.append(pd.Interval(left_date, right_date, closed="both"))
# update right date
right_date = left_date
right_date = left_date - pd.Timedelta(days=1)
Comment on lines 378 to +393
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

core adaptation to closed on both sides here.


return intervals

Expand Down Expand Up @@ -491,7 +492,8 @@ def _set_year_range_from_timestamps(self):
if self._map_year(max_year).iloc[0].right > self._last_timestamp:
max_year -= 1
# first date check
while self._map_year(min_year).iloc[-1].right <= self._first_timestamp:
# adaptation now that intervals are defined as closed="both".
while self._map_year(min_year).iloc[-1].right < self._first_timestamp:
Comment on lines +495 to +496
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

core adaptation to closed on both sides here.

min_year += 1

# map year(s) and generate year realized advent calendar
Expand Down
1 change: 1 addition & 0 deletions lilio/calendar_shifter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Calendar shifter to create staggered calendars."""

import copy
from typing import Union
import xarray as xr
Expand Down
1 change: 1 addition & 0 deletions lilio/calendar_shorthands.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Shorthands for calendars, to make generating commonly used calendars a one-liner."""

import re
import pandas as pd
from .calendar import Calendar
Expand Down
20 changes: 9 additions & 11 deletions lilio/resampling.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""The implementation of the resampling methods for use with the Calendar."""

import typing
from typing import Callable
from typing import Literal
Expand Down Expand Up @@ -244,20 +245,17 @@ def _resample_dataset(


@overload
def resample(calendar: Calendar, input_data: xr.Dataset) -> xr.Dataset:
...
def resample(calendar: Calendar, input_data: xr.Dataset) -> xr.Dataset: ...


@overload
def resample(calendar: Calendar, input_data: xr.DataArray) -> xr.DataArray:
...
def resample(calendar: Calendar, input_data: xr.DataArray) -> xr.DataArray: ...


@overload
def resample(
calendar: Calendar, input_data: Union[pd.Series, pd.DataFrame]
) -> pd.DataFrame:
...
) -> pd.DataFrame: ...


def resample(
Expand Down Expand Up @@ -326,11 +324,11 @@ def resample(
>>> cal = cal.map_to_data(input_data)
>>> bins = lilio.resample(cal, input_data)
>>> bins # doctest: +NORMALIZE_WHITESPACE
anchor_year i_interval interval data is_target
0 2019 -1 [2019-07-04, 2019-12-31) 14.5 False
1 2019 1 [2019-12-31, 2020-06-28) 119.5 True
2 2020 -1 [2020-07-04, 2020-12-31) 305.5 False
3 2020 1 [2020-12-31, 2021-06-29) 485.5 True
anchor_year i_interval interval data is_target
0 2019 -1 [2019-07-04, 2019-12-30] 14.5 False
1 2019 1 [2019-12-31, 2020-06-27] 119.5 True
2 2020 -1 [2020-07-04, 2020-12-30] 305.5 False
3 2020 1 [2020-12-31, 2021-06-28] 485.5 True
"""
if calendar.mapping is None:
raise ValueError("Generate a calendar map before calling resample")
Expand Down
7 changes: 3 additions & 4 deletions lilio/traintest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

Wrapper around sklearn splitters for working with (multiple) xarray dataarrays.
"""

from collections.abc import Iterable
from typing import Optional
from typing import Union
Expand Down Expand Up @@ -56,8 +57,7 @@ def split(
x_args: xr.DataArray,
y: Optional[xr.DataArray] = None,
dim: str = "anchor_year",
) -> Iterable[tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]]:
...
) -> Iterable[tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]]: ...

@overload
def split(
Expand All @@ -69,8 +69,7 @@ def split(
tuple[
Iterable[xr.DataArray], Iterable[xr.DataArray], xr.DataArray, xr.DataArray
]
]:
...
]: ...

def split(
self,
Expand Down
40 changes: 30 additions & 10 deletions lilio/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Commonly used utility functions for Lilio."""

import re
import typing
import warnings
Expand Down Expand Up @@ -107,27 +108,43 @@ def infer_input_data_freq(
Returns:
a pd.Timedelta
"""
if isinstance(data, (pd.Series, pd.DataFrame)):
if isinstance(data, (xr.DataArray, xr.Dataset)):
size = data.time.size
else:
size = data.size
if size == 1:
return pd.Timedelta("1d")

if isinstance(data, (pd.Series, pd.DataFrame)) and size >= 3:
# cannot infer when size < 3
data_freq = pd.infer_freq(data.index)
if data_freq is None: # Manually infer the frequency
data_freq = np.min(data.index.values[1:] - data.index.values[:-1])
else:
data_freq = xr.infer_freq(data.time)
if size >= 3: # cannot infer when size < 3
data_freq = xr.infer_freq(data.time)
else:
data_freq = None
if data_freq is None: # Manually infer the frequency
data_freq = (data.time.values[1:] - data.time.values[:-1]).min()

if isinstance(data_freq, str):
data_freq.replace("-", "") # Get the absolute frequency

if not re.match(r"\d+\D", data_freq): # infer_freq can return "d" for "1d".
data_freq = "1" + data_freq

# anoying switch from "2M" to "2ME" format in pandas > 2.2.
# We will need to adapt to this in the future.
if len(data_freq) in [3, 4] and data_freq[1:] in ["ME", "MS"]:
data_freq = data_freq.replace(data_freq[1:], "M")

data_freq = ( # Deal with monthly timedelta case
replace_month_length(data_freq) if data_freq[-1] == "M" else data_freq
)

data_freq = ( # Deal with yearly timedelta case
replace_year_length(data_freq) if "A" in data_freq else data_freq
replace_year_length(data_freq)
if "A" in data_freq or "Y" in data_freq
else data_freq
)

return pd.Timedelta(data_freq)
Expand Down Expand Up @@ -168,8 +185,9 @@ def check_input_frequency(
data_freq = infer_input_data_freq(data)
calendar_freq = get_smallest_calendar_freq(calendar)

if "label" in data.coords:
return
if data_freq == pd.Timedelta("365.25d") and calendar_freq == pd.Timedelta("1d"):
# Allow yearly (one-datapoint-per-year) data to be resampled to daily data.
return None

if calendar_freq < data_freq:
raise ValueError(
Expand Down Expand Up @@ -237,9 +255,11 @@ def check_reserved_names(
)
elif isinstance(input_data, (xr.DataArray, xr.Dataset)):
data_names = [
input_data.keys()
if isinstance(input_data, xr.Dataset)
else list(input_data.coords) + [input_data.name]
(
input_data.keys()
if isinstance(input_data, xr.Dataset)
else list(input_data.coords) + [input_data.name]
)
]
if any(name in data_names for name in reserved_names_xr):
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ classifiers = [
dependencies = [
"netcdf4",
"numpy",
"pandas",
"pandas < 2.2",
"matplotlib",
"xarray",
"scikit-learn",
Expand Down
Loading