diff --git a/configure.ac b/configure.ac
index 9664a6758d..44f7ae1f51 100644
--- a/configure.ac
+++ b/configure.ac
@@ -867,6 +867,12 @@ AC_CONFIG_FILES(tests/testu01/Makefile)
 AC_CONFIG_FILES(tests/sp2n/Makefile)
 AC_CONFIG_FILES(benchmarks/Makefile)
 AC_CONFIG_FILES(examples/Makefile)
+AC_CONFIG_LINKS(tests/run_regression_test.py:tests/run_regression_test.py)
+AC_CONFIG_LINKS(tests/conftest.py:tests/conftest.py)
+AC_CONFIG_LINKS(tests/sp2n/Test_hmc_Sp_WilsonFundFermionGauge_expected.txt:tests/sp2n/Test_hmc_Sp_WilsonFundFermionGauge_expected.txt)
+AC_CONFIG_LINKS(tests/hmc/Test_hmc_IwasakiGauge_expected.txt:tests/hmc/Test_hmc_IwasakiGauge_expected.txt)
+AC_CONFIG_LINKS(tests/hmc/Test_hmc_WilsonFermionGauge_expected.txt:tests/hmc/Test_hmc_WilsonFermionGauge_expected.txt)
+
 AC_OUTPUT
 
 echo ""
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000000..4f232d9c0d
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,43 @@
+import pytest
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--test_name", action="store", help="File name of the test"
+    )
+    parser.addoption(
+        "--grid", action="store", help="Grid configuration"
+    )
+    parser.addoption(
+        "--mpi", action="store", help="MPI configuration"
+    )
+    parser.addoption(
+        "--expected_line", action="store", help="Line number to read off the expected values file"
+    )
+
+@pytest.fixture
+def test_name(request):
+    return request.config.getoption("--test_name")
+
+@pytest.fixture
+def grid(request):
+    return request.config.getoption("--grid")
+
+@pytest.fixture
+def mpi(request):
+    return request.config.getoption("--mpi")
+
+@pytest.fixture
+def expected_line(request):
+    return request.config.getoption("--expected_line")
+
+@pytest.fixture
+def cleanup_files():
+    import os
+
+    def _cleanup(failed=True):  
+        if not failed: 
+            if os.path.exists("output.txt"): os.remove("output.txt")
+        if os.path.exists("ckpoint_rng.1"): os.remove("ckpoint_rng.1")
+        if os.path.exists("ckpoint_lat.1"): os.remove("ckpoint_lat.1")
+
+    return _cleanup
\ No newline at end of file
diff --git a/tests/hmc/Test_hmc_IwasakiGauge.cc b/tests/hmc/Test_hmc_IwasakiGauge.cc
index 66345bc03e..eaf68b639f 100644
--- a/tests/hmc/Test_hmc_IwasakiGauge.cc
+++ b/tests/hmc/Test_hmc_IwasakiGauge.cc
@@ -51,7 +51,7 @@ int main(int argc, char **argv) {
   CheckpointerParameters CPparams;  
   CPparams.config_prefix = "ckpoint_lat";
   CPparams.rng_prefix = "ckpoint_rng";
-  CPparams.saveInterval = 20;
+  CPparams.saveInterval = 1;
   CPparams.format = "IEEE64BIG";
   
   TheHMC.Resources.LoadNerscCheckpointer(CPparams);
diff --git a/tests/hmc/Test_hmc_IwasakiGauge_expected.txt b/tests/hmc/Test_hmc_IwasakiGauge_expected.txt
new file mode 100644
index 0000000000..6726a0ed3b
--- /dev/null
+++ b/tests/hmc/Test_hmc_IwasakiGauge_expected.txt
@@ -0,0 +1,3 @@
+1 8.8.8.8 1.1.1.1 4 20 1.0 GPU 0.269298793 633bf471 3a22ad20 -- Mashy's (probably local)
+2 8.8.8.8 1.1.1.1 1 20 1.0 CPU 0.269298793 633bf471 3a116d60
+3 8.8.8.8 1.1.1.1 4 20 1.0 CPU 0.269298793 633bf471 3a116d60
diff --git a/tests/hmc/Test_hmc_WilsonFermionGauge.cc b/tests/hmc/Test_hmc_WilsonFermionGauge.cc
index a0c43c515b..c89d2a87da 100644
--- a/tests/hmc/Test_hmc_WilsonFermionGauge.cc
+++ b/tests/hmc/Test_hmc_WilsonFermionGauge.cc
@@ -59,7 +59,7 @@ int main(int argc, char **argv) {
   CheckpointerParameters CPparams;  
   CPparams.config_prefix = "ckpoint_lat";
   CPparams.rng_prefix = "ckpoint_rng";
-  CPparams.saveInterval = 5;
+  CPparams.saveInterval = 1;
   CPparams.format = "IEEE64BIG";
   
   TheHMC.Resources.LoadNerscCheckpointer(CPparams);
diff --git a/tests/hmc/Test_hmc_WilsonFermionGauge_expected.txt b/tests/hmc/Test_hmc_WilsonFermionGauge_expected.txt
new file mode 100644
index 0000000000..93f50e1d07
--- /dev/null
+++ b/tests/hmc/Test_hmc_WilsonFermionGauge_expected.txt
@@ -0,0 +1,3 @@
+1 8.8.8.8 1.1.1.1 1 20 1.0 CPU 0.254950222 7f25d41 9d4e34e3
+2 8.8.8.8 1.1.1.1 4 20 1.0 CPU 0.254950222 7f25d41 9d63f203
+3 8.8.8.8 1.1.1.1 4 20 1.0 GPU 0.254950222 7f25d41 9d40279f -- Mashy's (probably local)
diff --git a/tests/run_regression_test.py b/tests/run_regression_test.py
new file mode 100644
index 0000000000..81bfa400d2
--- /dev/null
+++ b/tests/run_regression_test.py
@@ -0,0 +1,140 @@
+"""
+Steering script to run end-to-end regression tests.
+
+This script:
+1. Reads the parameters to run the test with and the expected values 
+   of certain outputs to compare against, from a file.
+2. Runs the requested test with the parameters read. 
+   The executable must exist in the local directory, it is not recompiled.
+3. Picks the output values from the output of the test.
+4. Compares output values with expected.
+5. Deletes the output files only if the test was successful.
+
+Dependencies: pytest, pytest-check
+
+The script should be run from the desired build/tests/<test_dir>,
+which should contain
+1. the test executable <test_name> and
+2. a file containing combinations of test parameters and the output values 
+   they are expected to produce, named <test_name>_expected.txt
+When adding new tests, please add the new <test_name>_expected.txt file
+to the repo and ammend configure.ac to link it to the build location
+with AC_CONFIG_LINKS.
+
+To run and produce nice reporting:
+pytest ../run_regression_test.py -rP --test_name=<test_name> --expected_line=<line_number>
+where 
+<test_name> is the name of the test executable
+<line_number> is the number of the line in the <test_name>_expected.txt file to read the
+test parameters and expected output values from
+
+For example, from build/tests/sp2n:
+pytest ../run_regression_test.py -rP --test_name=Test_hmc_Sp_WilsonFundFermionGauge --expected_line=2
+"""
+
+import pytest
+import pytest_check as check
+
+def read_expected_values_line(test_name, line_number=1):
+    """
+    Read test parameters and expected values from file.
+
+    The file contains one or more entries of the following format:
+    <line_number> <grid> <mpi> <nthreads> <MDsteps> <trajL> <CPU|GPU> <plaquette> <checksum_rng> <checksum_lat>
+    Eg.
+    1 8.8.8.8 1.1.1.1 1 2 0.1 CPU 0.0256253844 922c392f d1e4f305
+
+    This function will return the test parameters and expected values 
+    only for the requested line_number.
+    """
+
+    test_parameters = {}
+    with open(f"{test_name}_expected.txt") as file:
+        for line in file:
+            line_split = line.split()
+            if line_split and line_split[0] == line_number:
+                test_parameters['grid'] = line_split[1]
+                test_parameters['mpi'] = line_split[2]
+                test_parameters['nthreads'] = line_split[3]
+                test_parameters['MDsteps'] = line_split[4]
+                test_parameters['trajL'] = line_split[5]
+                test_parameters['CPU|GPU'] = line_split[6]
+                print("Reading reference values for ", test_parameters)
+                return test_parameters, float(line_split[7]), line_split[8], line_split[9]
+
+    return test_parameters, None, None, None
+
+
+def read_output(test_parameters):
+    """
+    Read test output and fish out values of interest.
+    """
+
+    MDsteps = None
+    trajL = None
+    CPUvsGPU = 'CPU'
+    checked_CPUvsGPU = False
+    checksum_rng = None
+    checksum_lat = None
+    plaquette = None
+
+    with open("output.txt", 'r') as file:
+        for line in file:
+        # Check that the test was run with the expected parameters
+            if not checked_CPUvsGPU:
+                if "cuda" in line:
+                    CPUvsGPU = 'GPU'
+                    checked_CPUvsGPU = True
+            if "Number of MD steps" in line:
+                MDsteps = int(line.split(' : ')[4].strip())
+                if MDsteps != int(test_parameters['MDsteps']):
+                    pytest.fail(f"Test was run with MDsteps={MDsteps} instead of {test_parameters['MDsteps']}. You need to modify the test source code and recompile.")
+            elif "Trajectory length" in line:
+                trajL = float(line.split(' : ')[4].strip())
+                if trajL != float(test_parameters['trajL']):
+                    pytest.fail(f"Test was run with trajL={trajL} instead of {test_parameters['trajL']}. You need to modify the test source code and recompile.")
+        # Read the values to test
+            elif "Written NERSC" in line:
+                subline = line.split('checksum ')[1]
+                if len(subline.split()) == 1: # this is the rng checksum line
+                    checksum_rng = subline.strip()
+                elif len(subline.split()) == 3: # this is the lat checksum and plaquette value line
+                    checksum_lat = subline.split()[0]
+                    plaquette = float(subline.split()[2])
+                else:
+                    print("Picked wrong line...")
+
+    if CPUvsGPU != test_parameters['CPU|GPU']:
+        pytest.fail(f"Test was run with {CPUvsGPU} instead of {test_parameters['CPU|GPU']}")
+    if (MDsteps is None) or (trajL is None):
+        pytest.fail("Could not verify test parameters MDsteps and/or trajL against test output.")
+
+    return plaquette, checksum_rng, checksum_lat
+
+
+def test_outputs(test_name, expected_line, cleanup_files):
+    import subprocess
+    import os
+
+    test_parameters, expected_plaquette, expected_checksum_rng, expected_checksum_lat = read_expected_values_line(test_name, expected_line)
+    if (not test_parameters) or (expected_plaquette is None) or (expected_checksum_rng is None) or (expected_checksum_lat is None):
+        pytest.fail(f"No appropriate reference values found, check {test_name}_expected.txt")
+
+    print(f"Running {test_name} for test parameters: ", test_parameters)
+    if test_parameters['nthreads'] == '0':
+        print("Running with 1 thread: --threads 1")
+        test_parameters['nthreads'] = '1'
+    result = subprocess.run([f"./{test_name} --grid {test_parameters['grid']} --mpi {test_parameters['mpi']} --Thermalizations 0 --Trajectories 1 --threads {test_parameters['nthreads']} > output.txt"], shell=True, encoding="text")
+    plaquette, checksum_rng, checksum_lat = read_output(test_parameters)
+    if (checksum_rng is None) or (checksum_lat is None) or (plaquette is None):
+        pytest.fail("Error reading values from output file. Make sure you compile the test with CPparams.saveInterval=1 in order to produce the required output.")
+        
+    # This manual check of each condition doesn't have to happen for pytest-check
+    # version 1.2.0 and later. We can use any_failures() instead.
+    failed = False
+    if not check.equal(plaquette, expected_plaquette, msg="Plaquette value comparison failed") : failed = True
+    if not check.equal(checksum_lat, expected_checksum_lat, msg="LAT file checksum comparison failed") : failed = True
+    if not check.equal(checksum_rng, expected_checksum_rng, msg="RND file checksum comparison failed") : failed = True
+
+    cleanup_files(failed)
+
diff --git a/tests/sp2n/Test_hmc_Sp_WilsonFundFermionGauge.cc b/tests/sp2n/Test_hmc_Sp_WilsonFundFermionGauge.cc
index e655000f36..357c606540 100644
--- a/tests/sp2n/Test_hmc_Sp_WilsonFundFermionGauge.cc
+++ b/tests/sp2n/Test_hmc_Sp_WilsonFundFermionGauge.cc
@@ -22,7 +22,7 @@ int main(int argc, char **argv) {
   CheckpointerParameters CPparams;
   CPparams.config_prefix = "ckpoint_lat";
   CPparams.rng_prefix = "ckpoint_rng";
-  CPparams.saveInterval = 100;
+  CPparams.saveInterval = 1;
   CPparams.format = "IEEE64BIG";
     
   TheHMC.Resources.LoadNerscCheckpointer(CPparams);
diff --git a/tests/sp2n/Test_hmc_Sp_WilsonFundFermionGauge_expected.txt b/tests/sp2n/Test_hmc_Sp_WilsonFundFermionGauge_expected.txt
new file mode 100644
index 0000000000..8d7e00a27e
--- /dev/null
+++ b/tests/sp2n/Test_hmc_Sp_WilsonFundFermionGauge_expected.txt
@@ -0,0 +1,4 @@
+1 8.8.8.8 1.1.1.1 4 36 1.0 CPU 0.281507042 922c392f 7047fcf6
+2 8.8.8.8 1.1.1.1 1 36 1.0 CPU 0.281507042 922c392f 6a8bb5f6
+3 8.8.8.8 1.1.1.1 4 2 0.1 CPU 0.0256253844 922c392f d1e371ab
+4 8.8.8.8 1.1.1.1 1 2 0.1 CPU 0.0256253844 922c392f d1e4f305
\ No newline at end of file