-
Notifications
You must be signed in to change notification settings - Fork 0
feat: Add XGBoost model implementation #23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -62,6 +62,12 @@ eda = [ | |||||||||||
| "pyarrow" | ||||||||||||
| ] | ||||||||||||
|
|
||||||||||||
| models = [ | ||||||||||||
| "xgboost", | ||||||||||||
| "pandas", | ||||||||||||
| "numpy", | ||||||||||||
| ] | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
|
|
||||||||||||
|
Comment on lines
+67
to
72
|
||||||||||||
| "pandas", | |
| "numpy", | |
| ] | |
| "numpy", | |
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| from .model_abstract import ModelAbstract | ||
| from .model_xgboost import ModelXgboost | ||
|
|
||
| __all__ = ["ModelAbstract", "ModelXgboost"] | ||
|
Comment on lines
+1
to
+4
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,24 @@ | ||||||||||||||||||||||||||||||||||||||||||||||
| from abc import ABC, abstractmethod | ||||||||||||||||||||||||||||||||||||||||||||||
| from typing import Any | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| import pandas as pd | ||||||||||||||||||||||||||||||||||||||||||||||
| import xgboost as xgb | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| class ModelAbstract(ABC): | ||||||||||||||||||||||||||||||||||||||||||||||
| """Abstract class for all models""" | ||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||
| @abstractmethod | ||||||||||||||||||||||||||||||||||||||||||||||
| def train(self, *_args: Any, **_kwargs: Any) -> xgb.Booster: | ||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||
| def train(self, *_args: Any, **_kwargs: Any) -> xgb.Booster: | |
| def train( | |
| self, | |
| *args: Any, | |
| params: Any = None, | |
| num_boost_round: Any = None, | |
| **kwargs: Any, | |
| ) -> xgb.Booster: |
Copilot
AI
Feb 8, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ModelAbstract imports xgboost and returns xgb.Booster, which makes the abstract base class (and any import of alphapulse.models) require the optional models extra. This will break uv run mypy / uv run pytest in the default dev environment where xgboost isn’t installed. Consider removing the hard dependency from the abstract layer (e.g., return Any/a protocol, or use a TYPE_CHECKING import and a forward reference) so the package can be imported/type-checked without the optional extra.
Copilot
AI
Feb 8, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overridden method signature does not match call, where it is passed an argument named 'params'. Overriding method method ModelXgboost.finetune matches the call.
Overridden method signature does not match call, where it is passed an argument named 'num_boost_round'. Overriding method method ModelXgboost.finetune matches the call.
| def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster: | |
| def finetune( | |
| self, | |
| params: Any = None, | |
| num_boost_round: int | None = None, | |
| *_args: Any, | |
| **_kwargs: Any, | |
| ) -> xgb.Booster: |
Copilot
AI
Feb 8, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typos in error text: “overriden” should be “overridden” in these NotImplementedError messages.
| raise NotImplementedError("Train method needs to be overriden") | |
| @abstractmethod | |
| def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster: | |
| """Finetune the trained model""" | |
| raise NotImplementedError("Finetune method needs to be overriden") | |
| @abstractmethod | |
| def predict(self, *_args: Any, **_kwargs: Any) -> pd.Series: | |
| """Predict the result of the trained model""" | |
| raise NotImplementedError("Predict method needs to be overriden") | |
| raise NotImplementedError("Train method needs to be overridden") | |
| @abstractmethod | |
| def finetune(self, *_args: Any, **_kwargs: Any) -> xgb.Booster: | |
| """Finetune the trained model""" | |
| raise NotImplementedError("Finetune method needs to be overridden") | |
| @abstractmethod | |
| def predict(self, *_args: Any, **_kwargs: Any) -> pd.Series: | |
| """Predict the result of the trained model""" | |
| raise NotImplementedError("Predict method needs to be overridden") |
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,57 @@ | ||||||||||||
| from collections.abc import Mapping | ||||||||||||
| from typing import Any | ||||||||||||
|
|
||||||||||||
| import pandas as pd | ||||||||||||
| import xgboost as xgb | ||||||||||||
|
|
||||||||||||
| from .model_abstract import ModelAbstract | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| class ModelXgboost(ModelAbstract): | ||||||||||||
| def __init__(self) -> None: | ||||||||||||
| self.model: xgb.Booster | None = None | ||||||||||||
|
|
||||||||||||
| def train( | ||||||||||||
| self, | ||||||||||||
| X: pd.DataFrame, | ||||||||||||
| y: pd.Series, | ||||||||||||
| params: Mapping[str, Any], | ||||||||||||
| num_boost_round: int = 10, | ||||||||||||
| **kwargs: Any, | ||||||||||||
| ) -> xgb.Booster: | ||||||||||||
| dtrain = xgb.DMatrix(X, label=y) | ||||||||||||
|
|
||||||||||||
| self.model = xgb.train( | ||||||||||||
| params=params, dtrain=dtrain, num_boost_round=num_boost_round, **kwargs | ||||||||||||
| ) | ||||||||||||
| return self.model | ||||||||||||
|
|
||||||||||||
| def finetune( | ||||||||||||
| self, | ||||||||||||
| X: pd.DataFrame, | ||||||||||||
| y: pd.Series, | ||||||||||||
| params: Mapping[str, Any], | ||||||||||||
| num_boost_round: int = 10, | ||||||||||||
| **kwargs: Any, | ||||||||||||
| ) -> xgb.Booster: | ||||||||||||
| if self.model is None: | ||||||||||||
| raise RuntimeError("Train initial model") | ||||||||||||
|
||||||||||||
| raise RuntimeError("Train initial model") | |
| raise RuntimeError( | |
| "ModelXgboost.finetune() requires an initial model. Call " | |
| "ModelXgboost.train() before finetune()." | |
| ) |
Copilot
AI
Feb 8, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similarly, this error message would be more actionable if it told callers exactly what to do (e.g., call train() before predict()) and/or included context (model name/method).
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,108 @@ | ||||||||
| import json | ||||||||
| from pathlib import Path | ||||||||
| from typing import Any | ||||||||
|
|
||||||||
| import numpy as np | ||||||||
| import pandas as pd | ||||||||
| import pytest | ||||||||
|
|
||||||||
| from alphapulse.models.model_xgboost import ModelXgboost | ||||||||
|
|
||||||||
| ROOT = Path(__file__).parent.parent.parent | ||||||||
| TRAIN_DATA_PATH = ROOT / "data" / "v5.2" / "train.parquet" | ||||||||
| FEATURES_JSON_PATH = ROOT / "data" / "v5.2" / "features.json" | ||||||||
| TEST_DATA_PATH = ROOT / "data" / "v5.2" / "live.parquet" | ||||||||
|
|
||||||||
|
|
||||||||
| @pytest.fixture | ||||||||
| def test_data() -> tuple[pd.DataFrame, list[str]]: | ||||||||
| """Load Numerai data""" | ||||||||
| with open(FEATURES_JSON_PATH, encoding="utf-8") as f: | ||||||||
| feature_metadata = json.load(f) | ||||||||
| feature_cols = feature_metadata["feature_sets"]["small"] | ||||||||
| target_cols = feature_metadata["targets"] | ||||||||
| train = pd.read_parquet( | ||||||||
| TRAIN_DATA_PATH, columns=["era"] + feature_cols + target_cols | ||||||||
| ) | ||||||||
| return train, feature_cols | ||||||||
|
Comment on lines
+11
to
+27
|
||||||||
|
|
||||||||
|
|
||||||||
| @pytest.fixture | ||||||||
| def xgb_params() -> dict[str, Any]: | ||||||||
| return { | ||||||||
| "learning_rate": 0.1, | ||||||||
| "max_depth": 6, | ||||||||
| "min_child_weight": 1, | ||||||||
| "gamma": 0, | ||||||||
| "subsample": 0.8, | ||||||||
| "colsample_bytree": 0.8, | ||||||||
| "lambda": 1, | ||||||||
| "alpha": 0, | ||||||||
|
||||||||
| "alpha": 0, | |
| "alpha": 0, | |
| "objective": "binary:logistic", |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The standard dev workflow (
uv sync --extra dev/uv run mypy/uv run pytest) will not install the newmodelsextra, but the code and tests unconditionally importxgboost. To keep CI/local dev green, either addxgboostto thedevextra (or core deps), or make thexgboostimport/tests conditional on the extra being installed.