Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ jobs:
main:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.12.4
cache: "pip"
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
docs/Makefile
docs/make.bat
datasets

.DS_Store/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
14 changes: 3 additions & 11 deletions configs/models_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,43 +8,35 @@
XGBRegressorConfig = (
xgb.XGBRegressor,
{
"n_estimators": [100, 200, 500],
"learning_rate": np.logspace(-2, -1, 3),
"max_depth": [3, 5, 7],
"subsample": np.linspace(0.7, 1.0, 3),
"colsample_bytree": np.linspace(0.7, 1.0, 3),
},
)

LassoConfig = (Lasso, {"alpha": np.logspace(-4, 1, 6)})
LassoConfig = (Lasso, {"alpha": np.logspace(-3, 2, 6)})

RidgeConfig = (Ridge, {"alpha": np.logspace(-1, 3, 5)})
RidgeConfig = (Ridge, {"alpha": np.logspace(-3, 2, 5)})

GradientBoostingRegressorConfig = (
GradientBoostingRegressor,
{
"n_estimators": [100, 200, 500],
"learning_rate": np.logspace(-2, -1, 3),
"max_depth": [3, 5, 7],
"subsample": np.linspace(0.7, 1.0, 3),
},
)

KNeighborsRegressorConfig = (
KNeighborsRegressor,
{
"n_neighbors": [3, 5, 10, 15],
"weights": ["uniform", "distance"],
"metric": ["euclidean", "manhattan", "minkowski"],
},
)

SVRConfig = (
SVR,
{"C": np.logspace(-1, 2, 4), "epsilon": np.linspace(0.01, 0.5, 4), "kernel": ["linear", "rbf", "poly"]},
{"C": np.logspace(-1, 2, 4), "epsilon": np.linspace(0.01, 0.5, 4)},
)


ModelsConfigs = {
"XGBRegressor": XGBRegressorConfig,
"Lasso": LassoConfig,
Expand Down
14 changes: 8 additions & 6 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
mypy~=1.10.0
black~=24.4.2
isort~=5.13.2
pytest~=7.4.4
pandas-stubs~=2.2.3.250308
hypothesis~=6.115.6
mypy>=1.10.0
black>=24.4.2
isort>=5.13.2
pytest>=7.4.4
pandas-stubs>=2.2.3.250308
hypothesis>=6.115.6
seaborn >= 0.13.2
matplotlib>=3.8.4
11 changes: 6 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
numpy~=1.26.4
scipy~=1.13.1
matplotlib~=3.8.4
scikit-learn~=1.6.1
pandas~=2.2.3
numpy>=2.1.3
scipy>=1.15.2
scikit-learn>=1.6.1
pandas>=2.2.3
xgboost >= 3.0.0
workalendar >= 17.0.0
15 changes: 3 additions & 12 deletions src/models/time_series_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def predict(self, X: pd.DataFrame) -> pd.DataFrame:
forecast_list.extend(zip(X.loc[mask, self.keys_index], X.loc[mask, self.date_index], forecast_values))
forecast_df = pd.DataFrame(forecast_list, columns=[self.keys_index, self.date_index, "Forecast"])
X = X.merge(forecast_df, on=[self.keys_index, self.date_index], how="left")
X["Forecast"] = X["Forecast"].apply(lambda x: x if x >= 0 else 0)
return X

def score(
Expand Down Expand Up @@ -265,20 +266,10 @@ def _setup_searchers(self) -> tuple[GridSearchCV, GridSearchCV]:
s_models = self._create_pipelines(self.seasonal_models)

grid_search_trend = GridSearchCV(
trend_pipe,
t_models,
cv=self.cv,
scoring=self.scoring,
verbose=0,
n_jobs=-1,
trend_pipe, t_models, cv=self.cv, scoring=self.scoring, verbose=0, n_jobs=-1, refit=True
)
grid_search_seasonal = GridSearchCV(
seasonal_pipe,
s_models,
cv=self.cv,
scoring=self.scoring,
verbose=0,
n_jobs=-1,
seasonal_pipe, s_models, cv=self.cv, scoring=self.scoring, verbose=0, n_jobs=-1, refit=True
)
return grid_search_trend, grid_search_seasonal

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@

import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.exceptions import NotFittedError
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess
from workalendar.europe import Russia

Expand Down Expand Up @@ -113,3 +116,46 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
X.drop(columns=columns_to_drop, inplace=True, errors="ignore")

return X


class FeatureExtractionTransformer(BaseEstimator, TransformerMixin):
def __init__(self) -> None:
self.pipe: Optional[Pipeline] = None

def fit(self, X: pd.DataFrame, y: Optional[pd.DataFrame] = None) -> "FeatureExtractionTransformer":
self.pipe = self.features_pipeline()
self.pipe.fit(X, y)
return self

@staticmethod
def features_pipeline() -> Pipeline:
ohe = ColumnTransformer(
transformers=[
(
"ohe",
OneHotEncoder(handle_unknown="ignore", sparse_output=False),
["holiday"],
)
],
remainder="passthrough",
verbose_feature_names_out=False,
force_int_remainder_cols=False,
)
ohe.set_output(transform="pandas")
pipline = Pipeline(
steps=[
("date_feature_transform", HolidayTransformer()),
("ohe", ohe),
("mean_ship_feature", MeanWeekMonthTransformer()),
(
"fourier_features",
FourierFeaturesTransformer(),
),
]
)
return pipline

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
if self.pipe is None:
raise NotFittedError()
return self.pipe.transform(X)
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:


class DateRangeFilledTransformer(BaseEstimator, TransformerMixin):
def __init__(self) -> None:
self._is_fitted_: bool = False

def fit(self, X: pd.DataFrame, y: Optional[pd.DataFrame] = None) -> "DateRangeFilledTransformer":
self._is_fitted_: bool = True
self._is_fitted_ = True
return self

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,15 @@
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

from src.special_preprocessing.transformers.features_extraction import (
FourierFeaturesTransformer,
HolidayTransformer,
MeanWeekMonthTransformer,
)
from src.special_preprocessing.transformers.preprocessing import (
from src.special_preprocessing.date_transformers.features_extraction import FeatureExtractionTransformer
from src.special_preprocessing.date_transformers.series_comp import DateRangeFilledTransformer, GroupByDateTransformer
from src.special_preprocessing.date_transformers.series_decomposition import Separation, SeriesDecompositionTransformer
from src.special_preprocessing.first_special_pipeline.preprocessing import (
ChangeTypesTransformer,
DropDuplicatesTransformer,
KeyIndexTransformer,
NaNHandlerTransformer,
)
from src.special_preprocessing.transformers.series_comp import DateRangeFilledTransformer, GroupByDateTransformer
from src.special_preprocessing.transformers.series_decomposition import Separation, SeriesDecompositionTransformer


def features() -> Pipeline:
ohe = ColumnTransformer(
transformers=[
(
"ohe",
OneHotEncoder(handle_unknown="ignore", sparse_output=False),
["holiday"],
)
],
remainder="passthrough",
verbose_feature_names_out=False,
)
ohe.set_output(transform="pandas")
pipline = Pipeline(
steps=[
("date_feature_transform", HolidayTransformer()),
("ohe", ohe),
("mean_ship_feature", MeanWeekMonthTransformer()),
(
"fourier_features",
FourierFeaturesTransformer(),
),
]
)
return pipline


def preprocessing() -> Pipeline:
Expand All @@ -56,6 +25,7 @@ def preprocessing() -> Pipeline:
],
remainder="passthrough",
verbose_feature_names_out=False,
force_int_remainder_cols=False,
)
ohe.set_output(transform="pandas")
pipline = Pipeline(
Expand All @@ -75,7 +45,7 @@ def preprocessing() -> Pipeline:
("base preprocessing", preprocessing()),
("fill_data_range", DateRangeFilledTransformer()),
("grouping", GroupByDateTransformer()),
("features extraction", features()),
("features extraction", FeatureExtractionTransformer()),
("decomposition", SeriesDecompositionTransformer()),
("separation", Separation()),
]
Expand Down
24 changes: 24 additions & 0 deletions src/special_preprocessing/second_special_pipeline/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from sklearn.pipeline import Pipeline

from src.special_preprocessing.date_transformers.features_extraction import FeatureExtractionTransformer
from src.special_preprocessing.date_transformers.series_decomposition import Separation, SeriesDecompositionTransformer
from src.special_preprocessing.second_special_pipeline.preprocessing import (
CategoricalFeaturesTransform,
DateRangeFilledTransformerSec,
DiscountTransformer,
KeyTransformer,
RenameColumns,
)

preprocessing_pipeline = Pipeline(
steps=[
("rename", RenameColumns()),
("key", KeyTransformer()),
("discount", DiscountTransformer()),
("fill_data_range", DateRangeFilledTransformerSec()),
("categorical_features_prep", CategoricalFeaturesTransform()),
("features extraction", FeatureExtractionTransformer()),
("decomposition", SeriesDecompositionTransformer()),
("separation", Separation()),
]
)
Loading