diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index b4574038c..b77cfd38c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -23,45 +23,51 @@ jobs:
   test:
     name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }}, sk-only:${{ matrix.sklearn-only }})
     runs-on: ${{ matrix.os }}
+
     strategy:
+      fail-fast: false
       matrix:
-        python-version: ["3.11"]
-        scikit-learn: ["1.3.*", "1.4.*", "1.5.*"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        scikit-learn: ["1.3.*", "1.4.*", "1.5.*", "1.6.*", "1.7.*"]
         os: [ubuntu-latest]
         sklearn-only: ["true"]
-      fail-fast:  false
+
+        exclude:
+          # incompatible version combinations
+          - python-version: "3.13"
+            scikit-learn: "1.3.*"
+          - python-version: "3.13"
+            scikit-learn: "1.4.*"
+
+        include:
+          # Full test run on Windows
+          - os: windows-latest
+            python-version: "3.12"
+            scikit-learn: "1.5.*"
+            sklearn-only: "false"
+
+          # Coverage run
+          - os: ubuntu-latest
+            python-version: "3.12"
+            scikit-learn: "1.5.*"
+            sklearn-only: "false"
+            code-cov: true
 
     steps:
     - uses: actions/checkout@v6
       with:
         fetch-depth: 2
+
     - name: Setup Python ${{ matrix.python-version }}
-      if: matrix.os != 'windows-latest'  # windows-latest only uses preinstalled Python (3.9.13)
       uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install test dependencies
+
+    - name: Install test dependencies and scikit-learn
       run: |
         python -m pip install --upgrade pip
-        pip install -e .[test]
-    - name: Install scikit-learn ${{ matrix.scikit-learn }}
-      run: |
-        pip install scikit-learn==${{ matrix.scikit-learn }}
-    - name: Install numpy for Python 3.8
-      # Python 3.8 & scikit-learn<0.24 requires numpy<=1.23.5
-      if: ${{ matrix.python-version == '3.8' && matrix.scikit-learn == '0.23.1' }}
-      run: |
-        pip install numpy==1.23.5
-    - name: "Install NumPy 1.x and SciPy <1.11 for scikit-learn < 1.4"
-      if: ${{ contains(fromJSON('["1.0.*", "1.1.*", "1.2.*", "1.3.*"]'), matrix.scikit-learn) }}
-      run: |
-        # scipy has a change to the 'mode' behavior which breaks scikit-learn < 1.4
-        # numpy 2.0 has several breaking changes
-        pip install "numpy<2.0" "scipy<1.11"
-    - name: Install scipy ${{ matrix.scipy }}
-      if: ${{ matrix.scipy }}
-      run: |
-        pip install scipy==${{ matrix.scipy }}
+        pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }}
+
     - name: Store repository status
       id: status-before
       if: matrix.os != 'windows-latest'
@@ -69,28 +75,45 @@ jobs:
         git_status=$(git status --porcelain -b)
         echo "BEFORE=$git_status" >> $GITHUB_ENV
         echo "Repository status before tests: $git_status"
+
     - name: Show installed dependencies
       run: python -m pip list
+
     - name: Run tests on Ubuntu Test
       if: matrix.os == 'ubuntu-latest'
       run: |
-        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
-        # Most of the time, running only the scikit-learn tests is sufficient
-        if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and not production'; else marks='not production'; fi
-        echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+        if [ "${{ matrix.code-cov }}" = "true" ]; then
+          codecov="--cov=openml --long --cov-report=xml"
+        fi
+
+        if [ "${{ matrix.sklearn-only }}" = "true" ]; then
+          marks="sklearn and not production"
+        else
+          marks="not production"
+        fi
+
         pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+
     - name: Run tests on Ubuntu Production
       if: matrix.os == 'ubuntu-latest'
       run: |
-        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
-        # Most of the time, running only the scikit-learn tests is sufficient
-        if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and production'; else marks='production'; fi
-        echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+        if [ "${{ matrix.code-cov }}" = "true" ]; then
+          codecov="--cov=openml --long --cov-report=xml"
+        fi
+
+        if [ "${{ matrix.sklearn-only }}" = "true" ]; then
+          marks="sklearn and production"
+        else
+          marks="production"
+        fi
+
         pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+
     - name: Run tests on Windows
       if: matrix.os == 'windows-latest'
       run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
         pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1
+
     - name: Check for files left behind by test
       if: matrix.os != 'windows-latest' && always()
       run: |
@@ -102,6 +125,7 @@ jobs:
             echo "Not all generated files have been deleted!"
             exit 1
         fi
+
     - name: Upload coverage
       if: matrix.code-cov && always()
       uses: codecov/codecov-action@v4
diff --git a/pyproject.toml b/pyproject.toml
index 2bf762b09..ede204ca0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,12 +50,11 @@ classifiers = [
   "Operating System :: Unix",
   "Operating System :: MacOS",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.8",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: 3.14",
 ]
 license = { file = "LICENSE" }
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 18d4f836f..e4cec56ab 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -625,6 +625,7 @@ def _run_and_upload_regression(
             sentinel=sentinel,
         )
 
+    @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
     def test_run_and_upload_logistic_regression(self):
         lr = LogisticRegression(solver="lbfgs", max_iter=1000)
@@ -633,6 +634,7 @@ def test_run_and_upload_logistic_regression(self):
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
+    @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
     def test_run_and_upload_linear_regression(self):
         lr = LinearRegression()
@@ -663,6 +665,7 @@ def test_run_and_upload_linear_regression(self):
         n_test_obs = self.TEST_SERVER_TASK_REGRESSION["n_test_obs"]
         self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
+    @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
     def test_run_and_upload_pipeline_dummy_pipeline(self):
         pipeline1 = Pipeline(
@@ -676,6 +679,7 @@ def test_run_and_upload_pipeline_dummy_pipeline(self):
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         self._run_and_upload_classification(pipeline1, task_id, n_missing_vals, n_test_obs, "62501")
 
+    @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
     @unittest.skipIf(
         Version(sklearn.__version__) < Version("0.20"),
@@ -740,6 +744,7 @@ def get_ct_cf(nominal_indices, numeric_indices):
             sentinel=sentinel,
         )
 
+    @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
     @unittest.skip("https://github.com/openml/OpenML/issues/1180")
     @unittest.skipIf(
@@ -792,6 +797,7 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
                 call_count += 1
         assert call_count == 3
 
+    @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
     def test_run_and_upload_gridsearch(self):
         estimator_name = (
@@ -847,6 +853,7 @@ def test_run_and_upload_randomsearch(self):
         trace = openml.runs.get_run_trace(run.run_id)
         assert len(trace.trace_iterations) == 5
 
+    @pytest.mark.skip(reason="failures_issue_1544")
     @pytest.mark.sklearn()
     def test_run_and_upload_maskedarrays(self):
         # This testcase is important for 2 reasons:
diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py
index 885f80a27..4a3dede4e 100644
--- a/tests/test_tasks/test_learning_curve_task.py
+++ b/tests/test_tasks/test_learning_curve_task.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import pandas as pd
+import pytest
 
 from openml.tasks import TaskType, get_task
 
diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py
index 14ed59470..3e324c4f8 100644
--- a/tests/test_tasks/test_regression_task.py
+++ b/tests/test_tasks/test_regression_task.py
@@ -4,6 +4,7 @@
 import ast
 
 import pandas as pd
+import pytest
 
 import openml
 from openml.exceptions import OpenMLServerException
diff --git a/tests/test_tasks/test_supervised_task.py b/tests/test_tasks/test_supervised_task.py
index 9c90b7e03..e5a17a72b 100644
--- a/tests/test_tasks/test_supervised_task.py
+++ b/tests/test_tasks/test_supervised_task.py
@@ -6,6 +6,7 @@
 import pandas as pd
 
 from openml.tasks import get_task
+import pytest
 
 from .test_task import OpenMLTaskTest
 
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index 5f1d577c0..0aa2dcc9b 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -174,6 +174,7 @@ def test_get_task_lazy(self):
         )
 
     @mock.patch("openml.tasks.functions.get_dataset")
+    @pytest.mark.xfail(reason="failures_issue_1544")
     def test_removal_upon_download_failure(self, get_dataset):
         class WeirdException(Exception):
             pass
diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py
index 4480c2cbc..540c43de0 100644
--- a/tests/test_tasks/test_task_methods.py
+++ b/tests/test_tasks/test_task_methods.py
@@ -5,6 +5,7 @@
 
 import openml
 from openml.testing import TestBase
+import pytest
 
 
 # Common methods between tasks