From 5afa7afc8f93c454460dc1c0e0053ac905a6703f Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Thu, 15 Aug 2024 20:24:03 +0200
Subject: [PATCH 01/10] Draft usage of same splits in all models.

---
 metalearners/cross_fit_estimator.py |  12 +--
 metalearners/xlearner.py            | 114 ++++++++++++++++++----------
 2 files changed, 81 insertions(+), 45 deletions(-)

diff --git a/metalearners/cross_fit_estimator.py b/metalearners/cross_fit_estimator.py
index aa112c03..66d86a07 100644
--- a/metalearners/cross_fit_estimator.py
+++ b/metalearners/cross_fit_estimator.py
@@ -242,12 +242,12 @@ def _predict_in_sample(
     ) -> np.ndarray:
         if not self._test_indices:
             raise ValueError()
-        if len(X) != sum(len(fold) for fold in self._test_indices):
-            raise ValueError(
-                "Trying to predict in-sample on data that is unlike data encountered in training. "
-                f"Training data included {sum(len(fold) for fold in self._test_indices)} "
-                f"observations while prediction data includes {len(X)} observations."
-            )
+        # if len(X) != sum(len(fold) for fold in self._test_indices):
+        #    raise ValueError(
+        #        "Trying to predict in-sample on data that is unlike data encountered in training. "
+        #        f"Training data included {sum(len(fold) for fold in self._test_indices)} "
+        #        f"observations while prediction data includes {len(X)} observations."
+        #    )
         n_outputs = self._n_outputs(method)
         predictions = self._initialize_prediction_tensor(
             n_observations=len(X),
diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index 28bee892..082917dc 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -99,31 +99,36 @@ def fit_all_nuisance(
 
         qualified_fit_params = self._qualified_fit_params(fit_params)
 
-        self._cvs: list = []
+        if not synchronize_cross_fitting:
+            raise ValueError()
+
+        self._cv_split_indices = self._split(X)
+        self._treatment_cv_split_indices = {}
 
         for treatment_variant in range(self.n_variants):
             self._treatment_variants_indices.append(w == treatment_variant)
-            if synchronize_cross_fitting:
-                cv_split_indices = self._split(
-                    index_matrix(X, self._treatment_variants_indices[treatment_variant])
+            treatment_indices = np.where(
+                self._treatment_variants_indices[treatment_variant]
+            )[0]
+            self._treatment_cv_split_indices[treatment_variant] = [
+                (
+                    np.intersect1d(train_indices, treatment_indices),
+                    np.intersect1d(test_indices, treatment_indices),
                 )
-            else:
-                cv_split_indices = None
-            self._cvs.append(cv_split_indices)
+                for train_indices, test_indices in self._cv_split_indices
+            ]
 
         nuisance_jobs: list[_ParallelJoblibSpecification | None] = []
         for treatment_variant in range(self.n_variants):
             nuisance_jobs.append(
                 self._nuisance_joblib_specifications(
-                    X=index_matrix(
-                        X, self._treatment_variants_indices[treatment_variant]
-                    ),
-                    y=y[self._treatment_variants_indices[treatment_variant]],
+                    X=X,
+                    y=y,
                     model_kind=VARIANT_OUTCOME_MODEL,
                     model_ord=treatment_variant,
                     n_jobs_cross_fitting=n_jobs_cross_fitting,
                     fit_params=qualified_fit_params[NUISANCE][VARIANT_OUTCOME_MODEL],
-                    cv=self._cvs[treatment_variant],
+                    cv=self._treatment_cv_split_indices[treatment_variant],
                 )
             )
 
@@ -160,13 +165,13 @@ def fit_all_treatment(
     ) -> Self:
         if self._treatment_variants_indices is None:
             raise ValueError(
-                "The nuisance models need to be fitted before fitting the treatment models."
+                "The nuisance models need to be fitted before fitting the treatment models. "
                 "In particular, the MetaLearner's attribute _treatment_variant_indices, "
                 "typically set during nuisance fitting, is None."
             )
-        if not hasattr(self, "_cvs"):
+        if not hasattr(self, "_treatment_cv_split_indices"):
             raise ValueError(
-                "The nuisance models need to be fitted before fitting the treatment models."
+                "The nuisance models need to be fitted before fitting the treatment models. "
                 "In particular, the MetaLearner's attribute _cvs, "
                 "typically set during nuisance fitting, does not exist."
             )
@@ -180,34 +185,31 @@ def fit_all_treatment(
                 is_oos=False,
             )
         )
-
         for treatment_variant in range(1, self.n_variants):
             imputed_te_control, imputed_te_treatment = self._pseudo_outcome(
                 y, w, treatment_variant, conditional_average_outcome_estimates
             )
             treatment_jobs.append(
                 self._treatment_joblib_specifications(
-                    X=index_matrix(
-                        X, self._treatment_variants_indices[treatment_variant]
-                    ),
+                    X=X,
                     y=imputed_te_treatment,
                     model_kind=TREATMENT_EFFECT_MODEL,
                     model_ord=treatment_variant - 1,
                     n_jobs_cross_fitting=n_jobs_cross_fitting,
                     fit_params=qualified_fit_params[TREATMENT][TREATMENT_EFFECT_MODEL],
-                    cv=self._cvs[treatment_variant],
+                    cv=self._treatment_cv_split_indices[treatment_variant],
                 )
             )
 
             treatment_jobs.append(
                 self._treatment_joblib_specifications(
-                    X=index_matrix(X, self._treatment_variants_indices[0]),
+                    X=X,
                     y=imputed_te_control,
                     model_kind=CONTROL_EFFECT_MODEL,
                     model_ord=treatment_variant - 1,
                     n_jobs_cross_fitting=n_jobs_cross_fitting,
                     fit_params=qualified_fit_params[TREATMENT][CONTROL_EFFECT_MODEL],
-                    cv=self._cvs[0],
+                    cv=self._treatment_cv_split_indices[0],
                 )
             )
 
@@ -278,19 +280,18 @@ def predict(
                         oos_method=oos_method,
                     )
                 )
-
                 tau_hat_treatment[treatment_variant_indices] = self.predict_treatment(
-                    X=index_matrix(X, treatment_variant_indices),
+                    X=X,
                     model_kind=TREATMENT_EFFECT_MODEL,
                     model_ord=treatment_variant - 1,
                     is_oos=False,
-                )
+                )[treatment_variant_indices]
                 tau_hat_control[control_indices] = self.predict_treatment(
-                    X=index_matrix(X, control_indices),
+                    X=X,
                     model_kind=CONTROL_EFFECT_MODEL,
                     model_ord=treatment_variant - 1,
                     is_oos=False,
-                )
+                )[control_indices]
                 tau_hat_control[non_control_indices] = self.predict_treatment(
                     X=index_matrix(X, non_control_indices),
                     model_kind=CONTROL_EFFECT_MODEL,
@@ -424,16 +425,8 @@ def _pseudo_outcome(
         This function can be used with both in-sample or out-of-sample data.
         """
         validate_valid_treatment_variant_not_control(treatment_variant, self.n_variants)
-
-        treatment_indices = w == treatment_variant
-        control_indices = w == 0
-
-        treatment_outcome = index_matrix(
-            conditional_average_outcome_estimates, control_indices
-        )[:, treatment_variant]
-        control_outcome = index_matrix(
-            conditional_average_outcome_estimates, treatment_indices
-        )[:, 0]
+        treatment_outcome = conditional_average_outcome_estimates[:, treatment_variant]
+        control_outcome = conditional_average_outcome_estimates[:, 0]
 
         if self.is_classification:
             # Get the probability of positive class, multiclass is currently not supported.
@@ -443,8 +436,8 @@ def _pseudo_outcome(
             control_outcome = control_outcome[:, 0]
             treatment_outcome = treatment_outcome[:, 0]
 
-        imputed_te_treatment = y[treatment_indices] - control_outcome
-        imputed_te_control = treatment_outcome - y[control_indices]
+        imputed_te_treatment = y - control_outcome
+        imputed_te_control = treatment_outcome - y
 
         return imputed_te_control, imputed_te_treatment
 
@@ -534,3 +527,46 @@ def _build_onnx(self, models: Mapping[str, Sequence], output_name: str = "tau"):
         final_model = build(input_dict, {output_name: cate})
         check_model(final_model, full_check=True)
         return final_model
+
+    def predict_conditional_average_outcomes(
+        self, X: Matrix, is_oos: bool, oos_method: OosMethod = OVERALL
+    ) -> np.ndarray:
+        if self._treatment_variants_indices is None:
+            raise ValueError(
+                "The metalearner needs to be fitted before predicting."
+                "In particular, the MetaLearner's attribute _treatment_variant_indices, "
+                "typically set during fitting, is None."
+            )
+        # TODO: Consider multiprocessing
+        n_obs = len(X)
+        nuisance_tensors = self._nuisance_tensors(n_obs)
+        conditional_average_outcomes_list = []
+
+        for tv in range(self.n_variants):
+            if is_oos:
+                conditional_average_outcomes_list.append(
+                    self.predict_nuisance(
+                        X=X,
+                        model_kind=VARIANT_OUTCOME_MODEL,
+                        model_ord=tv,
+                        is_oos=True,
+                        oos_method=oos_method,
+                    )
+                )
+            else:
+                cfe = self._nuisance_models[VARIANT_OUTCOME_MODEL][tv]
+                conditional_average_outcomes_list.append(
+                    nuisance_tensors[VARIANT_OUTCOME_MODEL][0].copy()
+                )
+                for split_index, test_indices in enumerate(cfe._test_indices):  # type: ignore[arg-type]
+                    model = cfe._estimators[split_index]
+                    predict_method_name = self.nuisance_model_specifications()[
+                        VARIANT_OUTCOME_MODEL
+                    ]["predict_method"](self)
+                    predict_method = getattr(model, predict_method_name)
+                    conditional_average_outcomes_list[tv][test_indices] = (
+                        predict_method(X[test_indices])
+                    )
+        return np.stack(conditional_average_outcomes_list, axis=1).reshape(
+            n_obs, self.n_variants, -1
+        )

From 413e5b0007c28df885760635865415c425d0ad4e Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Thu, 15 Aug 2024 20:40:33 +0200
Subject: [PATCH 02/10] Clean up.

---
 metalearners/xlearner.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index 082917dc..bec07b5f 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -539,7 +539,10 @@ def predict_conditional_average_outcomes(
             )
         # TODO: Consider multiprocessing
         n_obs = len(X)
-        nuisance_tensors = self._nuisance_tensors(n_obs)
+        cao_tensor = self._nuisance_tensors(n_obs)[VARIANT_OUTCOME_MODEL][0]
+        predict_method_name = self.nuisance_model_specifications()[
+            VARIANT_OUTCOME_MODEL
+        ]["predict_method"](self)
         conditional_average_outcomes_list = []
 
         for tv in range(self.n_variants):
@@ -554,19 +557,22 @@ def predict_conditional_average_outcomes(
                     )
                 )
             else:
+                # TODO: Consider moving this logic to CrossFitEstimator.predict.
                 cfe = self._nuisance_models[VARIANT_OUTCOME_MODEL][tv]
+                conditional_average_outcome_estimates = cao_tensor.copy()
+
+                for fold_index, test_indices in zip(
+                    range(cfe.n_folds), cfe._test_indices  # type: ignore[arg-type]
+                ):
+                    fold_model = cfe._estimators[fold_index]
+                    predict_method = getattr(fold_model, predict_method_name)
+                    fold_estimates = predict_method(X[test_indices])
+                    conditional_average_outcome_estimates[test_indices] = fold_estimates
+
                 conditional_average_outcomes_list.append(
-                    nuisance_tensors[VARIANT_OUTCOME_MODEL][0].copy()
+                    conditional_average_outcome_estimates
                 )
-                for split_index, test_indices in enumerate(cfe._test_indices):  # type: ignore[arg-type]
-                    model = cfe._estimators[split_index]
-                    predict_method_name = self.nuisance_model_specifications()[
-                        VARIANT_OUTCOME_MODEL
-                    ]["predict_method"](self)
-                    predict_method = getattr(model, predict_method_name)
-                    conditional_average_outcomes_list[tv][test_indices] = (
-                        predict_method(X[test_indices])
-                    )
+
         return np.stack(conditional_average_outcomes_list, axis=1).reshape(
             n_obs, self.n_variants, -1
         )

From 59554b11e76a145a965d00609a828c830ff90c03 Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Thu, 15 Aug 2024 20:46:14 +0200
Subject: [PATCH 03/10] Fix attribute reference.

---
 metalearners/xlearner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index bec07b5f..fa95edd9 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -172,7 +172,7 @@ def fit_all_treatment(
         if not hasattr(self, "_treatment_cv_split_indices"):
             raise ValueError(
                 "The nuisance models need to be fitted before fitting the treatment models. "
-                "In particular, the MetaLearner's attribute _cvs, "
+                "In particular, the MetaLearner's attribute _treatment_cv_split_indices, "
                 "typically set during nuisance fitting, does not exist."
             )
         qualified_fit_params = self._qualified_fit_params(fit_params)

From c0bdcbd6a263906523e602549cfe7b2ae1f89cba Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Thu, 15 Aug 2024 21:02:36 +0200
Subject: [PATCH 04/10] Filter properly.

---
 metalearners/xlearner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index fa95edd9..fd767194 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -566,7 +566,7 @@ def predict_conditional_average_outcomes(
                 ):
                     fold_model = cfe._estimators[fold_index]
                     predict_method = getattr(fold_model, predict_method_name)
-                    fold_estimates = predict_method(X[test_indices])
+                    fold_estimates = predict_method(index_matrix(X, test_indices))
                     conditional_average_outcome_estimates[test_indices] = fold_estimates
 
                 conditional_average_outcomes_list.append(

From fe16b752b7acb4a817af4a734cb93a92bfd7a12f Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Thu, 15 Aug 2024 21:26:23 +0200
Subject: [PATCH 05/10] Fix out-of-sample evaluate.

---
 metalearners/xlearner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index fd767194..e15c842d 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -374,8 +374,8 @@ def evaluate(
             tv_imputed_te_control, tv_imputed_te_treatment = self._pseudo_outcome(
                 y, w, treatment_variant, conditional_average_outcome_estimates
             )
-            imputed_te_control.append(tv_imputed_te_control)
-            imputed_te_treatment.append(tv_imputed_te_treatment)
+            imputed_te_control.append(tv_imputed_te_control[w == 0])
+            imputed_te_treatment.append(tv_imputed_te_treatment[w == treatment_variant])
 
         te_treatment_evaluation = _evaluate_model_kind(
             self._treatment_models[TREATMENT_EFFECT_MODEL],

From 410e9e7f6449b3d928d17ffefaf17c6918d1f4bc Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Thu, 15 Aug 2024 21:44:38 +0200
Subject: [PATCH 06/10] Fix in-sample evaluate.

---
 metalearners/xlearner.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index e15c842d..8962e566 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -338,8 +338,8 @@ def evaluate(
 
         variant_outcome_evaluation = _evaluate_model_kind(
             cfes=self._nuisance_models[VARIANT_OUTCOME_MODEL],
-            Xs=[X[w == tv] for tv in range(self.n_variants)],
-            ys=[y[w == tv] for tv in range(self.n_variants)],
+            Xs=[X] * self.n_variants,
+            ys=[y] * self.n_variants,
             scorers=safe_scoring[VARIANT_OUTCOME_MODEL],
             model_kind=VARIANT_OUTCOME_MODEL,
             is_oos=is_oos,
@@ -374,12 +374,12 @@ def evaluate(
             tv_imputed_te_control, tv_imputed_te_treatment = self._pseudo_outcome(
                 y, w, treatment_variant, conditional_average_outcome_estimates
             )
-            imputed_te_control.append(tv_imputed_te_control[w == 0])
-            imputed_te_treatment.append(tv_imputed_te_treatment[w == treatment_variant])
+            imputed_te_control.append(tv_imputed_te_control)
+            imputed_te_treatment.append(tv_imputed_te_treatment)
 
         te_treatment_evaluation = _evaluate_model_kind(
             self._treatment_models[TREATMENT_EFFECT_MODEL],
-            Xs=[X[w == tv] for tv in range(1, self.n_variants)],
+            Xs=[X] * self.n_variants,
             ys=imputed_te_treatment,
             scorers=safe_scoring[TREATMENT_EFFECT_MODEL],
             model_kind=TREATMENT_EFFECT_MODEL,
@@ -391,7 +391,7 @@ def evaluate(
 
         te_control_evaluation = _evaluate_model_kind(
             self._treatment_models[CONTROL_EFFECT_MODEL],
-            Xs=[X[w == 0] for _ in range(1, self.n_variants)],
+            Xs=[X] * self.n_variants,
             ys=imputed_te_control,
             scorers=safe_scoring[CONTROL_EFFECT_MODEL],
             model_kind=CONTROL_EFFECT_MODEL,

From 6a43c9cd562e5766c4c32462c7171bab72b7ec56 Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Thu, 15 Aug 2024 22:13:24 +0200
Subject: [PATCH 07/10] Adapt synchronization-related tests.

---
 metalearners/xlearner.py  |  5 ++++-
 tests/test_metalearner.py | 14 +++++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index 8962e566..40464a20 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -99,8 +99,11 @@ def fit_all_nuisance(
 
         qualified_fit_params = self._qualified_fit_params(fit_params)
 
+        # TODO: Move this to object initialization.
         if not synchronize_cross_fitting:
-            raise ValueError()
+            raise ValueError(
+                "The X-Learner does not support synchronize_cross_fitting=False."
+            )
 
         self._cv_split_indices = self._split(X)
         self._treatment_cv_split_indices = {}
diff --git a/tests/test_metalearner.py b/tests/test_metalearner.py
index d9ac1f68..e0907d1e 100644
--- a/tests/test_metalearner.py
+++ b/tests/test_metalearner.py
@@ -727,9 +727,17 @@ def test_fit_params(metalearner_factory, fit_params, expected_keys, dummy_datase
         is_classification=False,
         n_folds=1,
     )
-    # Using cross-fitting is not possible with a single fold.
+    if metalearner_factory == XLearner:
+        # TODO: The X-Learner doesn't support using synchronize_cross_fitting=False.
+        # As a consequence, it doesn't support n_folds=1 either.
+        # We should find an alternative to testing this property for the X-Learner.
+        pytest.skip()
     metalearner.fit(
-        X=X, y=y, w=w, fit_params=fit_params, synchronize_cross_fitting=False
+        X=X,
+        y=y,
+        w=w,
+        fit_params=fit_params,
+        synchronize_cross_fitting=False,
     )
 
 
@@ -994,9 +1002,9 @@ def test_shap_values_smoke(
     [
         TLearner,
         SLearner,
-        XLearner,
         RLearner,
         DRLearner,
+        # The X-Learner does not support synchronze_cross_fitting = False.
     ],
 )
 @pytest.mark.parametrize("n_variants", [2, 5])

From bbfff1504fe58ffe2d7c7d03a631a7bb9d33b4c7 Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Thu, 15 Aug 2024 23:39:40 +0200
Subject: [PATCH 08/10] Fix cao estimation only taking place for seen variant.

---
 metalearners/xlearner.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index 40464a20..132d97fe 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -192,6 +192,7 @@ def fit_all_treatment(
             imputed_te_control, imputed_te_treatment = self._pseudo_outcome(
                 y, w, treatment_variant, conditional_average_outcome_estimates
             )
+
             treatment_jobs.append(
                 self._treatment_joblib_specifications(
                     X=X,
@@ -221,6 +222,7 @@ def fit_all_treatment(
             delayed(_fit_cross_fit_estimator_joblib)(job) for job in treatment_jobs
         )
         self._assign_joblib_treatment_results(results)
+
         return self
 
     def predict(
@@ -564,13 +566,15 @@ def predict_conditional_average_outcomes(
                 cfe = self._nuisance_models[VARIANT_OUTCOME_MODEL][tv]
                 conditional_average_outcome_estimates = cao_tensor.copy()
 
-                for fold_index, test_indices in zip(
-                    range(cfe.n_folds), cfe._test_indices  # type: ignore[arg-type]
+                for fold_index, (train_indices, prediction_indices) in enumerate(
+                    self._cv_split_indices
                 ):
                     fold_model = cfe._estimators[fold_index]
                     predict_method = getattr(fold_model, predict_method_name)
-                    fold_estimates = predict_method(index_matrix(X, test_indices))
-                    conditional_average_outcome_estimates[test_indices] = fold_estimates
+                    fold_estimates = predict_method(index_matrix(X, prediction_indices))
+                    conditional_average_outcome_estimates[prediction_indices] = (
+                        fold_estimates
+                    )
 
                 conditional_average_outcomes_list.append(
                     conditional_average_outcome_estimates

From 68030969e302129e9bc713cf67eb302135d94c7e Mon Sep 17 00:00:00 2001
From: Kevin Klein <7267523+kklein@users.noreply.github.com>
Date: Fri, 16 Aug 2024 12:04:48 +0200
Subject: [PATCH 09/10] Update metalearners/xlearner.py

Co-authored-by: Matthias Loeffler <106818324+MatthiasLoefflerQC@users.noreply.github.com>
---
 metalearners/xlearner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index 132d97fe..71b225ca 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -557,7 +557,7 @@ def predict_conditional_average_outcomes(
                         X=X,
                         model_kind=VARIANT_OUTCOME_MODEL,
                         model_ord=tv,
-                        is_oos=True,
+                        is_oos=is_oos,
                         oos_method=oos_method,
                     )
                 )

From b005eb70b9e9f1991028dd75c0b708c010654c77 Mon Sep 17 00:00:00 2001
From: kklein <kevin.klein@quantco.com>
Date: Fri, 16 Aug 2024 13:48:58 +0200
Subject: [PATCH 10/10] Add type hints for cv-split-related attributes.

---
 metalearners/xlearner.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/metalearners/xlearner.py b/metalearners/xlearner.py
index 71b225ca..321cd940 100644
--- a/metalearners/xlearner.py
+++ b/metalearners/xlearner.py
@@ -8,7 +8,14 @@
 from joblib import Parallel, delayed
 from typing_extensions import Self
 
-from metalearners._typing import Matrix, OosMethod, Scoring, Vector, _ScikitModel
+from metalearners._typing import (
+    Matrix,
+    OosMethod,
+    Scoring,
+    SplitIndices,
+    Vector,
+    _ScikitModel,
+)
 from metalearners._utils import (
     check_spox_installed,
     copydoc,
@@ -105,8 +112,8 @@ def fit_all_nuisance(
                 "The X-Learner does not support synchronize_cross_fitting=False."
             )
 
-        self._cv_split_indices = self._split(X)
-        self._treatment_cv_split_indices = {}
+        self._cv_split_indices: SplitIndices = self._split(X)
+        self._treatment_cv_split_indices: dict[int, SplitIndices] = {}
 
         for treatment_variant in range(self.n_variants):
             self._treatment_variants_indices.append(w == treatment_variant)