diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1ebda1c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.12-slim +WORKDIR /app +COPY requirements.txt . +COPY . /app +RUN pip install --no-cache-dir -r requirements.txt +ENTRYPOINT ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/app/api.py b/app/api.py new file mode 100644 index 0000000..83b2624 --- /dev/null +++ b/app/api.py @@ -0,0 +1,56 @@ +import io + +import pandas as pd +from fastapi import APIRouter, File, HTTPException, UploadFile +from fastapi.responses import StreamingResponse +from pydantic import BaseModel + +from app.model_configurator import ModelConfigurator + +router = APIRouter() +configurator = ModelConfigurator() + + +class ConfigRequest(BaseModel): + trend_models: list[str] + seasonal_models: list[str] + + +class FilePathRequest(BaseModel): + path: str + + +@router.post("/configure") +def configure(request: ConfigRequest): + try: + configurator.set_config(request.trend_models, request.seasonal_models) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + return {"message": "Configuration saved"} + + +@router.post("/train") +def train(file: UploadFile = File(...)): + df = pd.read_csv(file.file, parse_dates=["date"], index_col=False) + try: + configurator.fit_model(df) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + return {"message": f"Model training was successful with configs {configurator.config_names}"} + + +@router.post("/predict") +def predict(file: UploadFile = File(...)): + df = pd.read_csv(file.file, parse_dates=["date"], index_col=False) + try: + prediction = configurator.predict(df) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + csv_buffer = io.StringIO() + prediction.to_csv(csv_buffer, index=False) + csv_buffer.seek(0) + return StreamingResponse( + content=csv_buffer, + media_type="text/csv", + headers={"Content-Disposition": "attachment; filename=predictions.csv"}, + ) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..623cdf7 --- /dev/null +++ b/app/main.py @@ -0,0 +1,11 @@ +from fastapi import FastAPI + +from app.api import router + +app = FastAPI( + title="Time Series Forecasting API", + description="API for configuring, training, and predicting time series models", + version="1.0.0", +) + +app.include_router(router, prefix="/api") diff --git a/app/model_configurator.py b/app/model_configurator.py new file mode 100644 index 0000000..3ac62ce --- /dev/null +++ b/app/model_configurator.py @@ -0,0 +1,32 @@ +from typing import Optional + +import pandas as pd + +from configs.models_collector import CONFIG_TYPE, ModelsCollector +from configs.models_configs import ModelsConfigs +from src.models.time_series_model import TimeSeriesModel + + +class ModelConfigurator: + def __init__(self) -> None: + self._trend_config: Optional[CONFIG_TYPE] = None + self._seasonal_config: Optional[CONFIG_TYPE] = None + self._model: Optional[TimeSeriesModel] = None + self._collector: ModelsCollector = ModelsCollector(ModelsConfigs) + self.config_names: Optional[dict] = None + + def set_config(self, trend_models: list[str], seasonal_models: list[str]) -> None: + self._trend_config = self._collector.get_configs(trend_models) + self._seasonal_config = self._collector.get_configs(seasonal_models) + self.config_names = {"trend_models": trend_models, "seasonal_models": seasonal_models} + + def fit_model(self, X: pd.DataFrame) -> None: + if self._trend_config is None or self._seasonal_config is None: + raise ValueError("Configs unsetted") + model = TimeSeriesModel(self._trend_config, self._seasonal_config) + self._model = model.fit(X) + + def predict(self, X: pd.DataFrame) -> pd.DataFrame: + if self._model is None: + raise ValueError("Unfitted") + return self._model.predict(X) diff --git a/configs/models_collector.py b/configs/models_collector.py index d59ab38..fe93ff0 100644 --- a/configs/models_collector.py +++ b/configs/models_collector.py @@ -1,4 +1,6 @@ -from typing import Any +from typing import Any, TypeVar + +CONFIG_TYPE = TypeVar("CONFIG_TYPE", bound=list[tuple[Any, list[Any]]]) class ModelsCollector: @@ -23,7 +25,7 @@ def __init__(self, models_config: dict[str, tuple[Any, list[Any]]]) -> None: """ self.models_config = models_config - def get_configs(self, models_names: list[str]) -> list[tuple[Any, list[Any]]]: + def get_configs(self, models_names: list[str]) -> CONFIG_TYPE: """ Retrieves unique configurations for the specified model names. diff --git a/requirements.txt b/requirements.txt index edf8925..6fa7c0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,7 @@ xgboost >= 3.0.0 workalendar >= 17.0.0 catboost >= 1.2.8 statsmodels >= 0.14.4 +fastapi>=0.115.12 +uvicorn >= 0.32.1 +pydantic >= 2.9.2 +python-multipart>=0.0.20 diff --git a/src/special_preprocessing/date_transformers/series_comp.py b/src/special_preprocessing/date_transformers/series_comp.py index f30aa21..3cd976f 100644 --- a/src/special_preprocessing/date_transformers/series_comp.py +++ b/src/special_preprocessing/date_transformers/series_comp.py @@ -11,7 +11,7 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.DataFrame] = None) -> "GroupByDate return self def transform(self, X: pd.DataFrame) -> pd.DataFrame: - special_columns = ["discount", "price", "discount.1", "key", "date", "ship"] + special_columns = ["discount", "price", "discount.1", "key", "date", "ship", "discount"] other = [col for col in X.columns if col not in special_columns] new_data = pd.DataFrame() keys = X["key"].unique() @@ -23,6 +23,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame: "ship": "sum", "discount.1": "mean", "price": "mean", + "discount": "max", } ) new_data = pd.concat([new_data, grouped], ignore_index=True) diff --git a/src/special_preprocessing/first_special_pipeline/pipeline.py b/src/special_preprocessing/first_special_pipeline/pipeline.py index 309ddf4..c9816f9 100644 --- a/src/special_preprocessing/first_special_pipeline/pipeline.py +++ b/src/special_preprocessing/first_special_pipeline/pipeline.py @@ -2,10 +2,10 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import OneHotEncoder -from src.special_preprocessing.date_transformers.features_extraction import FeatureExtractionTransformer -from src.special_preprocessing.date_transformers.series_comp import DateRangeFilledTransformer, GroupByDateTransformer -from src.special_preprocessing.date_transformers.series_decomposition import Separation, SeriesDecompositionTransformer -from src.special_preprocessing.first_special_pipeline.preprocessing import ( +from ..date_transformers.features_extraction import FeatureExtractionTransformer +from ..date_transformers.series_comp import DateRangeFilledTransformer, GroupByDateTransformer +from ..date_transformers.series_decomposition import Separation, SeriesDecompositionTransformer +from ..first_special_pipeline.preprocessing import ( ChangeTypesTransformer, DropDuplicatesTransformer, KeyIndexTransformer,