Palamabron · tamilatiurina · Feb 4, 2026 · Copilot · Feb 8, 2026 · Copilot
diff --git a/pyproject.toml b/pyproject.toml
@@ -62,6 +62,12 @@ eda = [
   "pyarrow"
 ]
 
+models = [
+  "xgboost",
+  "pandas",
+  "numpy",
+]
+
 
 
-  "pandas",
-  "numpy",
-]
+  "numpy",
+]
-  "pandas",
-  "numpy",
-]
+  "numpy",
+]
 [project.urls]

diff --git a/src/alphapulse/models/__init__.py b/src/alphapulse/models/__init__.py
@@ -0,0 +1,4 @@
+from .model_abstract import ModelAbstract
+from .model_xgboost import ModelXgboost
+
+__all__ = ["ModelAbstract", "ModelXgboost"]
diff --git a/src/alphapulse/models/model_abstract.py b/src/alphapulse/models/model_abstract.py
@@ -0,0 +1,24 @@
+from abc import ABC, abstractmethod
+from typing import Any
+
+import pandas as pd
+import xgboost as xgb
+
+
+class ModelAbstract(ABC):
+    """Abstract class for all models"""
+
+    @abstractmethod
+    def train(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
-    def train(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
+    def train(
+        self,
+        *args: Any,
+        params: Any = None,
+        num_boost_round: Any = None,
+        **kwargs: Any,
+    ) -> xgb.Booster:
-    def train(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
+    def train(
+        self,
+        *args: Any,
+        params: Any = None,
+        num_boost_round: Any = None,
+        **kwargs: Any,
+    ) -> xgb.Booster:
+        """Initial training the model"""
+        raise NotImplementedError("Train method needs to be overriden")
+
+    @abstractmethod
+    def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
-    def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
+    def finetune(
+        self,
+        params: Any = None,
+        num_boost_round: int | None = None,
+        *_args: Any,
+        **_kwargs: Any,
+    ) -> xgb.Booster:
-    def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
+    def finetune(
+        self,
+        params: Any = None,
+        num_boost_round: int | None = None,
+        *_args: Any,
+        **_kwargs: Any,
+    ) -> xgb.Booster:
+        """Finetune the trained model"""
+        raise NotImplementedError("Finetune method needs to be overriden")
+
+    @abstractmethod
+    def predict(self, *_args: Any, **_kwargs: Any) -> pd.Series:
+        """Predict the result of the trained model"""
+        raise NotImplementedError("Predict method needs to be overriden")
-        raise NotImplementedError("Train method needs to be overriden")
-
-    @abstractmethod
-    def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
-        """Finetune the trained model"""
-        raise NotImplementedError("Finetune method needs to be overriden")
-
-    @abstractmethod
-    def predict(self, *_args: Any, **_kwargs: Any) -> pd.Series:
-        """Predict the result of the trained model"""
-        raise NotImplementedError("Predict method needs to be overriden")
+        raise NotImplementedError("Train method needs to be overridden")
+
+    @abstractmethod
+    def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
+        """Finetune the trained model"""
+        raise NotImplementedError("Finetune method needs to be overridden")
+
+    @abstractmethod
+    def predict(self, *_args: Any, **_kwargs: Any) -> pd.Series:
+        """Predict the result of the trained model"""
+        raise NotImplementedError("Predict method needs to be overridden")
-        raise NotImplementedError("Train method needs to be overriden")
-
-    @abstractmethod
-    def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
-        """Finetune the trained model"""
-        raise NotImplementedError("Finetune method needs to be overriden")
-
-    @abstractmethod
-    def predict(self, *_args: Any, **_kwargs: Any) -> pd.Series:
-        """Predict the result of the trained model"""
-        raise NotImplementedError("Predict method needs to be overriden")
+        raise NotImplementedError("Train method needs to be overridden")
+
+    @abstractmethod
+    def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster:
+        """Finetune the trained model"""
+        raise NotImplementedError("Finetune method needs to be overridden")
+
+    @abstractmethod
+    def predict(self, *_args: Any, **_kwargs: Any) -> pd.Series:
+        """Predict the result of the trained model"""
+        raise NotImplementedError("Predict method needs to be overridden")
diff --git a/src/alphapulse/models/model_xgboost.py b/src/alphapulse/models/model_xgboost.py
@@ -0,0 +1,57 @@
+from collections.abc import Mapping
+from typing import Any
+
+import pandas as pd
+import xgboost as xgb
+
+from .model_abstract import ModelAbstract
+
+
+class ModelXgboost(ModelAbstract):
+    def __init__(self) -> None:
+        self.model: xgb.Booster | None = None
+
+    def train(
+        self,
+        X: pd.DataFrame,
+        y: pd.Series,
+        params: Mapping[str, Any],
+        num_boost_round: int = 10,
+        **kwargs: Any,
+    ) -> xgb.Booster:
+        dtrain = xgb.DMatrix(X, label=y)
+
+        self.model = xgb.train(
+            params=params, dtrain=dtrain, num_boost_round=num_boost_round, **kwargs
+        )
+        return self.model
+
+    def finetune(
+        self,
+        X: pd.DataFrame,
+        y: pd.Series,
+        params: Mapping[str, Any],
+        num_boost_round: int = 10,
+        **kwargs: Any,
+    ) -> xgb.Booster:
+        if self.model is None:
+            raise RuntimeError("Train initial model")
-            raise RuntimeError("Train initial model")
+            raise RuntimeError(
+                "ModelXgboost.finetune() requires an initial model. Call "
+                "ModelXgboost.train() before finetune()."
+            )
-            raise RuntimeError("Train initial model")
+            raise RuntimeError(
+                "ModelXgboost.finetune() requires an initial model. Call "
+                "ModelXgboost.train() before finetune()."
+            )
+
+        dtrain = xgb.DMatrix(X, label=y)
+        self.model = xgb.train(
+            params=params,
+            dtrain=dtrain,
+            num_boost_round=num_boost_round,
+            xgb_model=self.model,
+            **kwargs,
+        )
+        return self.model
+
+    def predict(self, X: pd.DataFrame, **kwargs: Any) -> pd.Series:
+        if self.model is None:
+            raise RuntimeError("Train a model first")
+
+        dtest = xgb.DMatrix(X)
+
+        preds = self.model.predict(dtest, **kwargs)
+        return pd.Series(preds, index=X.index, name="prediction")
diff --git a/tests/models/__init__.py b/tests/models/__init__.py
diff --git a/tests/models/test_models_xgboost.py b/tests/models/test_models_xgboost.py
@@ -0,0 +1,108 @@
+import json
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from alphapulse.models.model_xgboost import ModelXgboost
+
+ROOT = Path(__file__).parent.parent.parent
+TRAIN_DATA_PATH = ROOT / "data" / "v5.2" / "train.parquet"
+FEATURES_JSON_PATH = ROOT / "data" / "v5.2" / "features.json"
+TEST_DATA_PATH = ROOT / "data" / "v5.2" / "live.parquet"
+
+
+@pytest.fixture
+def test_data() -> tuple[pd.DataFrame, list[str]]:
+    """Load Numerai data"""
+    with open(FEATURES_JSON_PATH, encoding="utf-8") as f:
+        feature_metadata = json.load(f)
+    feature_cols = feature_metadata["feature_sets"]["small"]
+    target_cols = feature_metadata["targets"]
+    train = pd.read_parquet(
+        TRAIN_DATA_PATH, columns=["era"] + feature_cols + target_cols
+    )
+    return train, feature_cols
+
+
+@pytest.fixture
+def xgb_params() -> dict[str, Any]:
+    return {
+        "learning_rate": 0.1,
+        "max_depth": 6,
+        "min_child_weight": 1,
+        "gamma": 0,
+        "subsample": 0.8,
+        "colsample_bytree": 0.8,
+        "lambda": 1,
+        "alpha": 0,
-        "alpha": 0,
+        "alpha": 0,
+        "objective": "binary:logistic",
-        "alpha": 0,
+        "alpha": 0,
+        "objective": "binary:logistic",
+    }
+
+
+def test_train_creates_model(
+    test_data: tuple[pd.DataFrame, list[str]], xgb_params: dict[str, Any]
+) -> None:
+    """Checks if model was created"""
+    train, feature_cols = test_data
+
+    model = ModelXgboost()
+    booster = model.train(
+        train[feature_cols],
+        train["target"],
+        params=xgb_params,
+        num_boost_round=10,
+    )
+
+    assert booster is not None
+    assert model.model is booster
+
+
+def test_finetune_updates_model(
+    test_data: tuple[pd.DataFrame, list[str]], xgb_params: dict[str, Any]
+) -> None:
+    """Check if finetuning actually changes the model"""
+    train, feature_cols = test_data
+
+    model = ModelXgboost()
+
+    booster_before = model.train(
+        train[feature_cols],
+        train["target"],
+        params=xgb_params,
+        num_boost_round=5,
+    )
+
+    booster_after = model.finetune(
+        train[feature_cols],
+        train["target"],
+        params=xgb_params,
+        num_boost_round=5,
+    )
+
+    assert booster_after is not None
+    assert booster_after is not booster_before
+
+
+def test_predict_output_shape_and_range(
+    test_data: tuple[pd.DataFrame, list[str]], xgb_params: dict[str, Any]
+) -> None:
+    """Checks if the number of predictions is equal to the number of test samples
+    and if each prediction is in [0,1]
+    """
+    train, feature_cols = test_data
+    test = pd.read_parquet(TEST_DATA_PATH, columns=feature_cols)
+    model = ModelXgboost()
+    model.train(
+        train[feature_cols],
+        train["target"],
+        params=xgb_params,
+        num_boost_round=10,
+    )
+
+    preds = model.predict(test)
+
+    assert preds.shape[0] == test.shape[0]
+    assert np.all(preds >= 0.0)
+    assert np.all(preds <= 1.0)
diff --git a/uv.lock b/uv.lock