diff --git a/.build_petsc_for_ci.sh b/.build_petsc_for_ci.sh
new file mode 100755
index 0000000000..f9516b882a
--- /dev/null
+++ b/.build_petsc_for_ci.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+set -e
+
+if test $BUILD_PETSC ; then
+    if [[ ! -d $HOME/local/petsc/include/petsc ]]; then
+	echo "****************************************"
+	echo "Building PETSc"
+	echo "****************************************"
+
+	git clone -b release https://gitlab.com/petsc/petsc.git petsc --depth=1
+
+	unset PETSC_DIR
+	unset PETSC_ARCH
+
+	pushd petsc
+	./configure \
+	    --with-mpi=yes \
+	    --with-precision=double \
+	    --with-scalar-type=real \
+	    --with-shared-libraries=1 \
+	    --with-debugging=0 \
+	    {C,CXX,F}OPTFLAGS="-O3 -march=native" \
+	    --prefix=$HOME/local/petsc
+
+	make && make install
+	popd
+
+	echo "****************************************"
+	echo " Finished building PETSc"
+	echo "****************************************"
+
+	echo "****************************************"
+	echo "Building SLEPc"
+	echo "****************************************"
+
+    git clone -b release https://gitlab.com/slepc/slepc.git slepc --depth=1
+
+    pushd slepc
+	unset SLEPC_DIR
+	unset SLEPC_ARCH
+    PETSC_DIR=$HOME/local/petsc ./configure --prefix=$HOME/local/slepc
+
+    make SLEPC_DIR=$(pwd) PETSC_DIR=$HOME/local/petsc
+    make SLEPC_DIR=$(pwd) PETSC_DIR=$HOME/local/petsc install
+    popd
+
+	echo "****************************************"
+	echo " Finished building SLEPc"
+	echo "****************************************"
+    else
+	echo "****************************************"
+	echo " PETSc already installed"
+	echo "****************************************"
+    fi
+else
+    echo "****************************************"
+    echo " PETSc not requested"
+    echo "****************************************"
+fi
diff --git a/.ci_fedora.sh b/.ci_fedora.sh
index 5106480dc5..1a3d2d92b2 100755
--- a/.ci_fedora.sh
+++ b/.ci_fedora.sh
@@ -37,7 +37,7 @@ then
     cat /etc/os-release
     # Ignore weak depencies
     echo "install_weak_deps=False" >> /etc/dnf/dnf.conf
-    time dnf -y install dnf-plugins-core python3-pip cmake
+    time dnf -y install dnf5-plugins python3-pip cmake
     # Allow to override packages - see #2073
     time dnf copr enable -y davidsch/fixes4bout || :
     time dnf -y upgrade
@@ -49,7 +49,7 @@ then
     sudo -u test ${0/\/tmp/\/home\/test} $mpi
 ## If we are called as normal user, run test
 else
-    pip install --user zoidberg
+    pip install --user zoidberg natsort
     . /etc/profile.d/modules.sh
     module load mpi/${1}-x86_64
     export OMPI_MCA_rmaps_base_oversubscribe=yes
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8864c9d113..19f3ec56ca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,13 @@ if(${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.22)
   cmake_policy(SET CMP0127 NEW)
 endif()
 
+if ("${CMAKE_CURRENT_BINARY_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
+  option(BOUT_ALLOW_INSOURCE_BUILD "Whether BOUT++ should really allow to build in source." OFF)
+  if (NOT ${BOUT_ALLOW_INSOURCE_BUILD})
+    message(FATAL_ERROR "BOUT++ does not recommend in source builds. Try building out of source, e.g. with `cmake -S . -B build` or set -DBOUT_ALLOW_INSOURCE_BUILD=ON - but things may break!")
+  endif()
+endif()
+
 # CMake currently doesn't support proper semver
 # Set the version here, strip any extra tags to use in `project`
 # We try to use git to get a full description, inspired by setuptools_scm
diff --git a/cmake/FindPETSc.cmake b/cmake/FindPETSc.cmake
index 1ddf95d2d0..c93d464673 100644
--- a/cmake/FindPETSc.cmake
+++ b/cmake/FindPETSc.cmake
@@ -264,7 +264,7 @@ show :
 
   include(Check${PETSC_LANGUAGE_BINDINGS}SourceRuns)
 
-  macro (PETSC_TEST_RUNS includes libraries runs)
+  macro (petsc_test_compiles includes libraries runs)
     message(STATUS "PETSc test with : ${includes} ${libraries}" )
     if (PETSC_VERSION VERSION_GREATER 3.1)
       set (_PETSC_TSDestroy "TSDestroy(&ts)")
@@ -287,12 +287,12 @@ int main(int argc,char *argv[]) {
   return 0;
 }
 ")
-    multipass_source_runs ("${includes}" "${libraries}" "${_PETSC_TEST_SOURCE}" ${runs} "${PETSC_LANGUAGE_BINDINGS}")
+    multipass_source_compiles ("${includes}" "${libraries}" "${_PETSC_TEST_SOURCE}" ${runs} "${PETSC_LANGUAGE_BINDINGS}")
     if (${${runs}})
-      set (PETSC_EXECUTABLE_RUNS "YES" CACHE BOOL
+      set (PETSC_EXECUTABLE_COMPILES "YES" CACHE BOOL
         "Can the system successfully run a PETSc executable?  This variable can be manually set to \"YES\" to force CMake to accept a given PETSc configuration, but this will almost always result in a broken build.  If you change PETSC_DIR, PETSC_ARCH, or PETSC_CURRENT you would have to reset this variable." FORCE)
     endif (${${runs}})
-  endmacro (PETSC_TEST_RUNS)
+  endmacro ()
 
 
   find_path (PETSC_INCLUDE_DIR petscts.h
@@ -314,14 +314,14 @@ int main(int argc,char *argv[]) {
   set (petsc_mpi_include_dirs "${MPI_${PETSC_LANGUAGE_BINDINGS}_INCLUDE_DIRS}")
   #set (petsc_additional_libraries "MPI::MPI_${PETSC_LANGUAGE_BINDINGS}${petsc_openmp_library}")
 
-  petsc_test_runs ("${petsc_includes_minimal};${petsc_mpi_include_dirs}"
+  petsc_test_compiles ("${petsc_includes_minimal};${petsc_mpi_include_dirs}"
     "${PETSC_LIBRARIES_TS};${petsc_additional_libraries}"
     petsc_works_minimal)
   if (petsc_works_minimal)
     message (STATUS "Minimal PETSc includes and libraries work.  This probably means we are building with shared libs.")
     set (petsc_includes_needed "${petsc_includes_minimal}")
   else (petsc_works_minimal)     # Minimal includes fail, see if just adding full includes fixes it
-    petsc_test_runs ("${petsc_includes_all};${petsc_mpi_include_dirs}"
+    petsc_test_compiles ("${petsc_includes_all};${petsc_mpi_include_dirs}"
       "${PETSC_LIBRARIES_TS};${petsc_additional_libraries}"
       petsc_works_allincludes)
     if (petsc_works_allincludes) # It does, we just need all the includes (
@@ -332,7 +332,7 @@ int main(int argc,char *argv[]) {
       foreach (pkg SYS VEC MAT DM KSP SNES TS ALL)
         list (APPEND PETSC_LIBRARIES_${pkg}  ${petsc_libraries_external})
       endforeach (pkg)
-      petsc_test_runs ("${petsc_includes_minimal};${petsc_mpi_include_dirs}"
+      petsc_test_compiles ("${petsc_includes_minimal};${petsc_mpi_include_dirs}"
         "${PETSC_LIBRARIES_TS};${petsc_additional_libraries}"
         petsc_works_alllibraries)
       if (petsc_works_alllibraries)
@@ -341,7 +341,7 @@ int main(int argc,char *argv[]) {
       else (petsc_works_alllibraries)
         # It looks like we really need everything, should have listened to Matt
         set (petsc_includes_needed ${petsc_includes_all})
-        petsc_test_runs ("${petsc_includes_all};${petsc_mpi_include_dirs}"
+        petsc_test_compiles ("${petsc_includes_all};${petsc_mpi_include_dirs}"
           "${PETSC_LIBRARIES_TS};${petsc_additional_libraries}"
           petsc_works_all)
         if (petsc_works_all) # We fail anyways
@@ -372,7 +372,7 @@ if (NOT PETSC_INCLUDES AND NOT TARGET PETSc::PETSc)
     pkg_search_module(PkgPETSC PETSc>3.4.0 petsc>3.4.0)
     set (PETSC_LIBRARIES ${PkgPETSC_LINK_LIBRARIES} CACHE STRING "PETSc libraries" FORCE)
     set (PETSC_INCLUDES ${PkgPETSC_INCLUDE_DIRS} CACHE STRING "PETSc include path" FORCE)
-    set (PETSC_EXECUTABLE_RUNS "YES" CACHE BOOL
+    set (PETSC_EXECUTABLE_COMPILES "YES" CACHE BOOL
         "Can the system successfully run a PETSc executable?  This variable can be manually set to \"YES\" to force CMake to accept a given PETSc configuration, but this will almost always result in a broken build.  If you change PETSC_DIR, PETSC_ARCH, or PETSC_CURRENT you would have to reset this variable." FORCE)
   endif()
 endif()
@@ -380,7 +380,7 @@ endif()
 # Note that we have forced values for all these choices.  If you
 # change these, you are telling the system to trust you that they
 # work.  It is likely that you will end up with a broken build.
-mark_as_advanced (PETSC_INCLUDES PETSC_LIBRARIES PETSC_COMPILER PETSC_DEFINITIONS PETSC_MPIEXEC PETSC_EXECUTABLE_RUNS)
+mark_as_advanced (PETSC_INCLUDES PETSC_LIBRARIES PETSC_COMPILER PETSC_DEFINITIONS PETSC_MPIEXEC PETSC_EXECUTABLE_COMPILES)
 
 include (FindPackageHandleStandardArgs)
 find_package_handle_standard_args (PETSc
diff --git a/cmake/FindPackageMultipass.cmake b/cmake/FindPackageMultipass.cmake
index fbf06a7f0f..2452096b56 100644
--- a/cmake/FindPackageMultipass.cmake
+++ b/cmake/FindPackageMultipass.cmake
@@ -32,6 +32,8 @@
 #  Always runs the given test, use this when you need to re-run tests
 #  because parent variables have made old cache entries stale.
 
+include(CheckCXXSourceCompiles)
+
 macro (FIND_PACKAGE_MULTIPASS _name _current)
   string (TOUPPER ${_name} _NAME)
   set (_args ${ARGV})
@@ -104,3 +106,27 @@ endmacro (MULTIPASS_SOURCE_RUNS)
 macro (MULTIPASS_C_SOURCE_RUNS includes libraries source runs)
   multipass_source_runs("${includes}" "${libraries}" "${source}" ${runs} "C")
 endmacro (MULTIPASS_C_SOURCE_RUNS)
+
+macro (MULTIPASS_SOURCE_COMPILES includes libraries source runs language)
+  include (Check${language}SourceRuns)
+  # This is a ridiculous hack.  CHECK_${language}_SOURCE_* thinks that if the
+  # *name* of the return variable doesn't change, then the test does
+  # not need to be re-run.  We keep an internal count which we
+  # increment to guarantee that every test name is unique.  If we've
+  # gotten here, then the configuration has changed enough that the
+  # test *needs* to be rerun.
+  if (NOT MULTIPASS_TEST_COUNT)
+    set (MULTIPASS_TEST_COUNT 00)
+  endif (NOT MULTIPASS_TEST_COUNT)
+  math (EXPR _tmp "${MULTIPASS_TEST_COUNT} + 1") # Why can't I add to a cache variable?
+  set (MULTIPASS_TEST_COUNT ${_tmp} CACHE INTERNAL "Unique test ID")
+  set (testname MULTIPASS_TEST_${MULTIPASS_TEST_COUNT}_${runs})
+  set (CMAKE_REQUIRED_INCLUDES ${includes})
+  set (CMAKE_REQUIRED_LIBRARIES ${libraries})
+  if(${language} STREQUAL "C")
+    check_c_source_compiles ("${source}" ${testname})
+  elseif(${language} STREQUAL "CXX")
+    check_cxx_source_compiles ("${source}" ${testname})
+  endif()
+  set (${runs} "${${testname}}")
+endmacro ()
diff --git a/cmake/FindSLEPc.cmake b/cmake/FindSLEPc.cmake
index 2db1a25a1f..3add8eba8b 100644
--- a/cmake/FindSLEPc.cmake
+++ b/cmake/FindSLEPc.cmake
@@ -137,10 +137,9 @@ show :
 endif()
 
 if (SLEPC_SKIP_BUILD_TESTS)
-  set(SLEPC_TEST_RUNS TRUE)
   set(SLEPC_VERSION "UNKNOWN")
   set(SLEPC_VERSION_OK TRUE)
-elseif (SLEPC_LIBRARIES AND SLEPC_INCLUDE_DIRS AND NOT SLEPC_TEST_RUNS)
+elseif (SLEPC_LIBRARIES AND SLEPC_INCLUDE_DIRS)
 
   # Set flags for building test program
   set(CMAKE_REQUIRED_INCLUDES ${SLEPC_INCLUDE_DIRS})
@@ -192,72 +191,6 @@ int main() {
     set(SLEPC_VERSION_OK TRUE CACHE BOOL "")
   endif()
   mark_as_advanced(SLEPC_VERSION_OK)
-
-  # Run SLEPc test program
-  set(SLEPC_TEST_LIB_CPP
-    "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/slepc_test_lib.cpp")
-  file(WRITE ${SLEPC_TEST_LIB_CPP} "
-#include \"petsc.h\"
-#include \"slepceps.h\"
-int main()
-{
-  PetscErrorCode ierr;
-  int argc = 0;
-  char** argv = NULL;
-  ierr = SlepcInitialize(&argc, &argv, PETSC_NULL, PETSC_NULL);
-  EPS eps;
-  ierr = EPSCreate(PETSC_COMM_SELF, &eps); CHKERRQ(ierr);
-  //ierr = EPSSetFromOptions(eps); CHKERRQ(ierr);
-#if PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR <= 1
-  ierr = EPSDestroy(eps); CHKERRQ(ierr);
-#else
-  ierr = EPSDestroy(&eps); CHKERRQ(ierr);
-#endif
-  ierr = SlepcFinalize(); CHKERRQ(ierr);
-  return 0;
-}
-")
-
-  try_run(
-    SLEPC_TEST_LIB_EXITCODE
-    SLEPC_TEST_LIB_COMPILED
-    ${CMAKE_CURRENT_BINARY_DIR}
-    ${SLEPC_TEST_LIB_CPP}
-    CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}"
-    LINK_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}
-    COMPILE_OUTPUT_VARIABLE SLEPC_TEST_LIB_COMPILE_OUTPUT
-    RUN_OUTPUT_VARIABLE SLEPC_TEST_LIB_OUTPUT
-    )
-
-  if (SLEPC_TEST_LIB_COMPILED AND SLEPC_TEST_LIB_EXITCODE EQUAL 0)
-    message(STATUS "Performing test SLEPC_TEST_RUNS - Success")
-    set(SLEPC_TEST_RUNS TRUE CACHE BOOL "SLEPc test program can run")
-  else()
-    message(STATUS "Performing test SLEPC_TEST_RUNS - Failed")
-
-    # Test program does not run - try adding SLEPc 3rd party libs and test again
-    list(APPEND CMAKE_REQUIRED_LIBRARIES ${SLEPC_EXTERNAL_LIBRARIES})
-
-    try_run(
-      SLEPC_TEST_3RD_PARTY_LIBS_EXITCODE
-      SLEPC_TEST_3RD_PARTY_LIBS_COMPILED
-      ${CMAKE_CURRENT_BINARY_DIR}
-      ${SLEPC_TEST_LIB_CPP}
-      CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}"
-      LINK_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}
-      COMPILE_OUTPUT_VARIABLE SLEPC_TEST_3RD_PARTY_LIBS_COMPILE_OUTPUT
-      RUN_OUTPUT_VARIABLE SLEPC_TEST_3RD_PARTY_LIBS_OUTPUT
-      )
-
-    if (SLEPC_TEST_3RD_PARTY_LIBS_COMPILED AND SLEPC_TEST_3RD_PARTY_LIBS_EXITCODE EQUAL 0)
-      message(STATUS "Performing test SLEPC_TEST_3RD_PARTY_LIBS_RUNS - Success")
-      set(SLEPC_LIBRARIES ${SLEPC_LIBRARIES} ${SLEPC_EXTERNAL_LIBRARIES}
-        CACHE STRING "SLEPc libraries." FORCE)
-      set(SLEPC_TEST_RUNS TRUE CACHE BOOL "SLEPc test program can run")
-    else()
-      message(STATUS "Performing test SLEPC_TEST_3RD_PARTY_LIBS_RUNS - Failed")
-    endif()
-  endif()
 endif()
 
 # Standard package handling
@@ -266,8 +199,7 @@ find_package_handle_standard_args(SLEPc
   FOUND_VAR SLEPC_FOUND
   FAIL_MESSAGE "SLEPc could not be found. Be sure to set SLEPC_DIR, PETSC_DIR, and PETSC_ARCH."
   VERSION_VAR SLEPC_VERSION
-  REQUIRED_VARS SLEPC_LIBRARIES SLEPC_DIR SLEPC_INCLUDE_DIRS SLEPC_TEST_RUNS
-                SLEPC_VERSION_OK)
+  REQUIRED_VARS SLEPC_LIBRARIES SLEPC_DIR SLEPC_INCLUDE_DIRS SLEPC_VERSION_OK)
 
 if (SLEPC_FOUND)
   if (NOT TARGET SLEPc::SLEPc)
diff --git a/examples/laplacexy/simple-hypre/test-laplacexy-hypre.cxx b/examples/laplacexy/simple-hypre/test-laplacexy-hypre.cxx
index 154870ec9e..af2367e1a0 100644
--- a/examples/laplacexy/simple-hypre/test-laplacexy-hypre.cxx
+++ b/examples/laplacexy/simple-hypre/test-laplacexy-hypre.cxx
@@ -13,12 +13,14 @@ int main(int argc, char** argv) {
                                                 bout::globals::mesh);
 
     /// Solution
-    Field2D x = 0.0;
+    Field2D solution = 0.0;
 
-    x = laplacexy.solve(rhs, x);
+    solution = laplacexy.solve(rhs, solution);
 
-    SAVE_ONCE2(rhs, x);
-    bout::globals::dump.write(); // Save output file
+    Options dump;
+    dump["rhs"] = rhs;
+    dump["x"] = solution;
+    bout::writeDefaultOutputFile(dump);
   }
   BoutFinalise();
 #if BOUT_HAS_CUDA
diff --git a/examples/tokamak-2fluid/2fluid.cxx b/examples/tokamak-2fluid/2fluid.cxx
index 9108edb5bd..03458d2d4e 100644
--- a/examples/tokamak-2fluid/2fluid.cxx
+++ b/examples/tokamak-2fluid/2fluid.cxx
@@ -4,6 +4,7 @@
  * for cross-benchmarking etc.
  *******************************************************************************/
 
+#include "bout/bout_types.hxx"
 #include <bout/physicsmodel.hxx>
 
 #include <bout/derivs.hxx>
@@ -98,6 +99,9 @@ class TwoFluid : public PhysicsModel {
   std::unique_ptr<Laplacian> aparSolver;
   Field2D acoef; // Coefficient in the Helmholtz equation
 
+  /// Location of possibly staggered fields
+  CELL_LOC stagger_loc = CELL_LOC::deflt;
+
   int init(bool UNUSED(restarting)) override {
     TRACE("int init(bool) ");
 
@@ -154,15 +158,15 @@ class TwoFluid : public PhysicsModel {
     // READ OPTIONS
 
     // Read some parameters
-    auto globalOptions = Options::root();
-    auto options = globalOptions["2fluid"];
+    auto& globalOptions = Options::root();
+    auto& options = globalOptions["2fluid"];
 
     AA = options["AA"].withDefault(2.0);
     ZZ = options["ZZ"].withDefault(1.0);
 
     estatic = options["estatic"].withDefault(false);
     ZeroElMass = options["ZeroElMass"].withDefault(false);
-    zeff = options["zeff"].withDefault(1.0);
+    zeff = options["Zeff"].withDefault(1.0);
     nu_perp = options["nu_perp"].withDefault(0.0);
     ShearFactor = options["ShearFactor"].withDefault(1.0);
     OhmPe = options["OhmPe"].withDefault(true);
@@ -170,8 +174,7 @@ class TwoFluid : public PhysicsModel {
     curv_upwind = options["curv_upwind"].withDefault(false);
 
     // Choose method to use for Poisson bracket advection terms
-    int bracket_method;
-    bracket_method = options["bracket_method"].withDefault(0);
+    int bracket_method = options["bracket_method"].withDefault(0);
     switch (bracket_method) {
     case 0: {
       bm = BRACKET_STD;
@@ -206,79 +209,92 @@ class TwoFluid : public PhysicsModel {
     laplace_extra_rho_term = options["laplace_extra_rho_term"].withDefault(false);
     vort_include_pi = options["vort_include_pi"].withDefault(false);
 
-    lowPass_z = options["lowPass_z"].withDefault(-1);
+    lowPass_z = options["low_pass_z"].withDefault(-1);
+
+    ////////////////////////////////////////////////////////
+    // Equation terms
+
+    auto& ni_options = globalOptions["Ni"];
+    ni_options.setConditionallyUsed();
+    evolve_ni = ni_options["evolve"].withDefault(true);
+
+    auto& rho_options = globalOptions["rho"];
+    rho_options.setConditionallyUsed();
+    evolve_rho = rho_options["evolve"].withDefault(true);
+
+    auto& vi_options = globalOptions["Vi"];
+    vi_options.setConditionallyUsed();
+    evolve_vi = vi_options["evolve"].withDefault(true);
+
+    auto& te_options = globalOptions["Te"];
+    te_options.setConditionallyUsed();
+    evolve_ti = te_options["evolve"].withDefault(true);
 
-    evolve_ni = globalOptions["Ni"]["evolve"].withDefault(true);
-    evolve_rho = globalOptions["rho"]["evolve"].withDefault(true);
-    evolve_vi = globalOptions["vi"]["evolve"].withDefault(true);
-    evolve_ti = globalOptions["ti"]["evolve"].withDefault(true);
-    evolve_ajpar = globalOptions["Ajpar"]["evolve"].withDefault(true);
+    auto& ti_options = globalOptions["Ti"];
+    ti_options.setConditionallyUsed();
+    evolve_ti = ti_options["evolve"].withDefault(true);
+
+    auto& ajpar_options = globalOptions["Ajpar"];
+    ajpar_options.setConditionallyUsed();
+    evolve_ajpar = ajpar_options["evolve"].withDefault(true);
 
     if (ZeroElMass) {
-      evolve_ajpar = 0; // Don't need ajpar - calculated from ohm's law
+      evolve_ajpar = false; // Don't need ajpar - calculated from ohm's law
     }
 
-    ////////////////////////////////////////////////////////
-    // Equation terms
-
     if (evolve_ni) {
-      Options* options = &globalOptions["Ni"];
-      options->get("ni1_phi0", ni_ni1_phi0, false);
-      options->get("ni0_phi1", ni_ni0_phi1, false);
-      options->get("ni1_phi1", ni_ni1_phi1, false);
-      options->get("nit_phit", ni_nit_phit, false);
-      options->get("vi1_ni0", ni_vi1_ni0, false);
-      options->get("vi0_ni1", ni_vi0_ni1, false);
-      options->get("vi1_ni1", ni_vi1_ni1, false);
-      options->get("vit_nit", ni_vit_nit, false);
-      options->get("jpar1", ni_jpar1, false);
-      options->get("pe1", ni_pe1, false);
-      options->get("ni0_curv_phi1", ni_ni0_curv_phi1, false);
-      options->get("ni1_curv_phi0", ni_ni1_curv_phi0, false);
-      options->get("ni1_curv_phi1", ni_ni1_curv_phi1, false);
-      options->get("nit_curv_phit", ni_nit_curv_phit, false);
+      ni_ni1_phi0 = ni_options["ni1_phi0"].withDefault(false);
+      ni_ni0_phi1 = ni_options["ni0_phi1"].withDefault(false);
+      ni_ni1_phi1 = ni_options["ni1_phi1"].withDefault(false);
+      ni_nit_phit = ni_options["nit_phit"].withDefault(false);
+      ni_vi1_ni0 = ni_options["vi1_ni0"].withDefault(false);
+      ni_vi0_ni1 = ni_options["vi0_ni1"].withDefault(false);
+      ni_vi1_ni1 = ni_options["vi1_ni1"].withDefault(false);
+      ni_vit_nit = ni_options["vit_nit"].withDefault(false);
+      ni_jpar1 = ni_options["jpar1"].withDefault(false);
+      ni_pe1 = ni_options["pe1"].withDefault(false);
+      ni_ni0_curv_phi1 = ni_options["ni0_curv_phi1"].withDefault(false);
+      ni_ni1_curv_phi0 = ni_options["ni1_curv_phi0"].withDefault(false);
+      ni_ni1_curv_phi1 = ni_options["ni1_curv_phi1"].withDefault(false);
+      ni_nit_curv_phit = ni_options["nit_curv_phit"].withDefault(false);
     }
 
     if (evolve_rho) {
-      Options* options = &globalOptions["rho"];
-      options->get("rho0_phi1", rho_rho0_phi1, false);
-      options->get("rho1_phi0", rho_rho1_phi0, false);
-      options->get("rho1_phi1", rho_rho1_phi1, false);
-      options->get("vi1_rho0", rho_vi1_rho0, false);
-      options->get("vi0_rho1", rho_vi0_rho1, false);
-      options->get("vi1_rho1", rho_vi1_rho1, false);
-      options->get("pei1", rho_pei1, false);
-      options->get("jpar1", rho_jpar1, false);
-      options->get("rho1", rho_rho1, false);
+      rho_rho0_phi1 = rho_options["rho0_phi1"].withDefault(false);
+      rho_rho1_phi0 = rho_options["rho1_phi0"].withDefault(false);
+      rho_rho1_phi1 = rho_options["rho1_phi1"].withDefault(false);
+      rho_vi1_rho0 = rho_options["vi1_rho0"].withDefault(false);
+      rho_vi0_rho1 = rho_options["vi0_rho1"].withDefault(false);
+      rho_vi1_rho1 = rho_options["vi1_rho1"].withDefault(false);
+      rho_pei1 = rho_options["pei1"].withDefault(false);
+      rho_jpar1 = rho_options["jpar1"].withDefault(false);
+      rho_rho1 = rho_options["rho1"].withDefault(false);
     }
 
     if (evolve_vi) {
-      Options* options = &globalOptions["vi"];
-      options->get("vi0_phi1", vi_vi0_phi1, false);
-      options->get("vi1_phi0", vi_vi1_phi0, false);
-      options->get("vi1_phi1", vi_vi1_phi1, false);
-      options->get("vit_phit", vi_vit_phit, false);
-      options->get("vi1_vi0", vi_vi1_vi0, false);
-      options->get("vi0_vi1", vi_vi0_vi1, false);
-      options->get("vi1_vi1", vi_vi1_vi1, false);
-      options->get("vit_vit", vi_vit_vit, false);
-      options->get("pei1", vi_pei1, false);
-      options->get("peit", vi_peit, false);
-      options->get("vi1", vi_vi1, false);
+      vi_vi0_phi1 = vi_options["vi0_phi1"].withDefault(false);
+      vi_vi1_phi0 = vi_options["vi1_phi0"].withDefault(false);
+      vi_vi1_phi1 = vi_options["vi1_phi1"].withDefault(false);
+      vi_vit_phit = vi_options["vit_phit"].withDefault(false);
+      vi_vi1_vi0 = vi_options["vi1_vi0"].withDefault(false);
+      vi_vi0_vi1 = vi_options["vi0_vi1"].withDefault(false);
+      vi_vi1_vi1 = vi_options["vi1_vi1"].withDefault(false);
+      vi_vit_vit = vi_options["vit_vit"].withDefault(false);
+      vi_pei1 = vi_options["pei1"].withDefault(false);
+      vi_peit = vi_options["peit"].withDefault(false);
+      vi_vi1 = vi_options["vi1"].withDefault(false);
     }
 
     if (evolve_te) {
-      Options* options = &globalOptions["te"];
-      options->get("te1_phi0", te_te1_phi0, false);
-      options->get("te0_phi1", te_te0_phi1, false);
-      options->get("te1_phi1", te_te1_phi1, false);
+      te_te1_phi0 = te_options["te1_phi0"].withDefault(false);
+      te_te0_phi1 = te_options["te0_phi1"].withDefault(false);
+      te_te1_phi1 = te_options["te1_phi1"].withDefault(false);
     }
 
     if (evolve_ti) {
-      Options* options = &globalOptions["ti"];
-      options->get("ti1_phi0", ti_ti1_phi0, false);
-      options->get("ti0_phi1", ti_ti0_phi1, false);
-      options->get("ti1_phi1", ti_ti1_phi1, false);
+      ti_ti1_phi0 = ti_options["ti1_phi0"].withDefault(false);
+      ti_ti0_phi1 = ti_options["ti0_phi1"].withDefault(false);
+      ti_ti1_phi1 = ti_options["ti1_phi1"].withDefault(false);
     }
 
     ////////////////////////////////////////////////////////
@@ -337,13 +353,15 @@ class TwoFluid : public PhysicsModel {
       ////////////////////////////////////////////////////////
       // SHIFTED GRIDS LOCATION
 
+      stagger_loc = CELL_LOC::ylow;
+
       // Velocities defined on cell boundaries
-      Vi.setLocation(CELL_YLOW);
-      Ajpar.setLocation(CELL_YLOW);
+      Vi.setLocation(stagger_loc);
+      Ajpar.setLocation(stagger_loc);
 
       // Apar and jpar too
-      Apar.setLocation(CELL_YLOW);
-      jpar.setLocation(CELL_YLOW);
+      Apar.setLocation(stagger_loc);
+      jpar.setLocation(stagger_loc);
     }
 
     ////////////////////////////////////////////////////////
@@ -490,16 +508,17 @@ class TwoFluid : public PhysicsModel {
     dump.addOnce(wci, "wci");
 
     // Create a solver for the Laplacian
-    phiSolver = Laplacian::create(&options["phiSolver"]);
+    phiSolver = Laplacian::create(&globalOptions["phiSolver"]);
     if (laplace_extra_rho_term) {
       // Include the first order term Grad_perp Ni dot Grad_perp phi
       phiSolver->setCoefC(Ni0);
     }
 
+    globalOptions["aparSolver"].setConditionallyUsed();
+
     if (!(estatic || ZeroElMass)) {
       // Create a solver for the electromagnetic potential
-      aparSolver = Laplacian::create(&options["aparSolver"],
-                                     mesh->StaggerGrids ? CELL_YLOW : CELL_CENTRE);
+      aparSolver = Laplacian::create(&globalOptions["aparSolver"], stagger_loc);
       if (mesh->StaggerGrids) {
         acoef = (-0.5 * beta_p / fmei) * interp_to(Ni0, CELL_YLOW);
       } else {
@@ -599,10 +618,13 @@ class TwoFluid : public PhysicsModel {
     if (ZeroElMass) {
       // Set jpar,Ve,Ajpar neglecting the electron inertia term
       // Calculate Jpar, communicating across processors
-      jpar = -(Ni0 * Grad_par(phi, CELL_YLOW)) / (fmei * 0.51 * nu);
+
+      jpar =
+          -interp_to(Ni0, stagger_loc) * Grad_par(phi, stagger_loc) / (fmei * 0.51 * nu);
 
       if (OhmPe) {
-        jpar += (Te0 * Grad_par(Ni, CELL_YLOW)) / (fmei * 0.51 * nu);
+        jpar +=
+            interp_to(Te0, stagger_loc) * Grad_par(Ni, stagger_loc) / (fmei * 0.51 * nu);
       }
 
       // Need to communicate jpar
diff --git a/examples/tokamak-2fluid/data/BOUT.inp b/examples/tokamak-2fluid/data/BOUT.inp
index 563af66cd6..214bafc19a 100644
--- a/examples/tokamak-2fluid/data/BOUT.inp
+++ b/examples/tokamak-2fluid/data/BOUT.inp
@@ -45,14 +45,6 @@ first = C4
 second = C4
 upwind = U1
 
-##################################################
-# Laplacian inversion settings
-
-[laplace]
-all_terms = false
-nonuniform = true
-filter = 0.2   # Remove the top 20% of modes (BOUT-06 zwindow=0.4)
-
 ##################################################
 # Solver settings
 
@@ -81,8 +73,6 @@ estatic = true     # if true, electrostatic (Apar = 0). (BOUT-06 = esop)
 ZeroElMass = true  # Use Ohms law without electron inertia
 bout_jpar = true   # Use BOUT-06 method to calculate ZeroElMass jpar
 
-bout_exb = true    # Use the BOUT-06 subset of ExB terms
-
 curv_upwind = false # Use upwinding for b0xkappa_dot_Grad terms
 
 laplace_extra_rho_term = false # include Grad_perp(Ni) dot Grad_perp(phi) term
diff --git a/externalpackages/PVODE/include/pvode/nvector.h b/externalpackages/PVODE/include/pvode/nvector.h
index 8232995847..1095bdb474 100644
--- a/externalpackages/PVODE/include/pvode/nvector.h
+++ b/externalpackages/PVODE/include/pvode/nvector.h
@@ -88,12 +88,13 @@ namespace pvode {
 
 
 /* Set types real and integer for MPI calls. */
-
+#ifndef PVEC_REAL_MPI_TYPE
 #if (LLNL_DOUBLE == 1)
 #define PVEC_REAL_MPI_TYPE MPI_DOUBLE
 #else
 #define PVEC_REAL_MPI_TYPE MPI_FLOAT
 #endif
+#endif
 
 // Ugh, potentionanlly bad but need to do this for now when compiling with both SUNDIALS and PVODE
 #ifndef PVEC_INTEGER_MPI_TYPE
diff --git a/externalpackages/PVODE/precon/band.cpp b/externalpackages/PVODE/precon/band.cpp
index c1d0f6f1e2..43151320d0 100644
--- a/externalpackages/PVODE/precon/band.cpp
+++ b/externalpackages/PVODE/precon/band.cpp
@@ -185,8 +185,8 @@ integer gbfa(real **a, integer n, integer mu, integer ml, integer smu,
     if (col_k[storage_l] == ZERO) return(k+1);
     
     /* swap a(l,k) and a(k,k) if necessary */
-    
-    if (swap = (l != k)) {
+    swap = (l != k);
+    if (swap) {
       temp = col_k[storage_l];
       col_k[storage_l] = *diag_k;
       *diag_k = temp;
diff --git a/externalpackages/PVODE/source/cvode.cpp b/externalpackages/PVODE/source/cvode.cpp
index e28a3b1ae6..135662a40f 100644
--- a/externalpackages/PVODE/source/cvode.cpp
+++ b/externalpackages/PVODE/source/cvode.cpp
@@ -179,7 +179,7 @@ namespace pvode {
 
 #define MSG_Y0_NULL     CVM "y0=NULL illegal.\n\n"
 
-#define MSG_BAD_N       CVM "N=%ld < 1 illegal.\n\n"
+#define MSG_BAD_N CVM "N=%d < 1 illegal.\n\n"
 
 #define MSG_BAD_LMM_1   CVM "lmm=%d illegal.\n"
 #define MSG_BAD_LMM_2   "The legal values are ADAMS=%d and BDF=%d.\n\n"
@@ -1826,6 +1826,8 @@ static int CVnls(CVodeMem cv_mem, int nflag)
     case FUNCTIONAL : return(CVnlsFunctional(cv_mem));
     case NEWTON     : return(CVnlsNewton(cv_mem, nflag));
   }
+  fprintf(errfp, "Should be unreachable ...");
+  return (ERR_FAILURE);
 }
 
 /***************** CVnlsFunctional ********************************
@@ -2392,6 +2394,8 @@ static int CVHandleFailure(CVodeMem cv_mem, int kflag)
     case SOLVE_FAILED:  fprintf(errfp, MSG_SOLVE_FAILED, tn);
                         return(SOLVE_FAILURE);
   }
+  fprintf(errfp, "Should be unreachable ...");
+  return (ERR_FAILURE);
 }
 
 /*******************************************************************/
diff --git a/externalpackages/boutdata b/externalpackages/boutdata
index 2d4c5bb4b2..ab59ef9138 160000
--- a/externalpackages/boutdata
+++ b/externalpackages/boutdata
@@ -1 +1 @@
-Subproject commit 2d4c5bb4b2bdc0b74d3d267559a5624aa599b95f
+Subproject commit ab59ef913884918a5bc5ee84101e6d6b833dcd6c
diff --git a/externalpackages/boututils b/externalpackages/boututils
index a79a00a54f..c2e97a226a 160000
--- a/externalpackages/boututils
+++ b/externalpackages/boututils
@@ -1 +1 @@
-Subproject commit a79a00a54f69663117a93dd42ffa6004783432c8
+Subproject commit c2e97a226a8a7728b8f5085792f7744484943512
diff --git a/include/bout/boutcomm.hxx b/include/bout/boutcomm.hxx
index 77038844a8..fea401af02 100644
--- a/include/bout/boutcomm.hxx
+++ b/include/bout/boutcomm.hxx
@@ -40,7 +40,7 @@ public:
   static BoutComm* getInstance();
 
   /// Shortcut method
-  static MPI_Comm get();
+  static MPI_Comm& get();
 
   static void setArgs(int& c, char**& v);
 
@@ -53,7 +53,7 @@ public:
   void setComm(MPI_Comm c);
 
   // Getters
-  MPI_Comm getComm();
+  MPI_Comm& getComm();
   bool isSet();
 
 private:
diff --git a/include/bout/field.hxx b/include/bout/field.hxx
index 51719de82e..be347f60d0 100644
--- a/include/bout/field.hxx
+++ b/include/bout/field.hxx
@@ -345,7 +345,8 @@ inline bool isUniform(const T& f, bool allpe = false,
   auto element = f[*f.getRegion(region).begin()];
   // TODO: maybe parallise this loop, as the early return is unlikely
   BOUT_FOR_SERIAL(i, f.getRegion(region)) {
-    if (f[i] != element) {
+    // by default we only check for exact equality, as that should cover most cases
+    if (f[i] != element and (not almost_equal(f[i], element, 10))) {
       result = false;
       break;
     }
@@ -370,8 +371,12 @@ inline BoutReal getUniform(const T& f, MAYBE_UNUSED(bool allpe) = false,
                            const std::string& region = "RGN_ALL") {
 #if CHECK > 1
   if (not isUniform(f, allpe, region)) {
-    throw BoutException("Requested getUniform({}, {}, {}) but Field is not const", f.name,
-                        allpe, region);
+    const BoutReal f1 = min(f, allpe, region);
+    const BoutReal f2 = max(f, allpe, region);
+    throw BoutException("Requested getUniform({}, {}, {}) but Field is not const "
+                        "([{:.15f}...{:.15f}] Δ={:e} {:e}ε)",
+                        f.name, allpe, region, f1, f2, f2 - f1,
+                        (f2 - f1) / (f1 + f2) / std::numeric_limits<BoutReal>::epsilon());
   }
 #endif
   return f[*f.getRegion(region).begin()];
diff --git a/include/bout/output.hxx b/include/bout/output.hxx
index bb202cc8df..ef09aa7ee5 100644
--- a/include/bout/output.hxx
+++ b/include/bout/output.hxx
@@ -137,11 +137,11 @@ private:
 class DummyOutput : public Output {
 public:
   template <class S, class... Args>
-  void write(const S&, const Args&...){};
+  void write([[maybe_unused]] const S& format, [[maybe_unused]] const Args&... args){};
   template <class S, class... Args>
-  void print(const S&, const Args&...){};
-  void write(const std::string& s) override{};
-  void print(const std::string& s) override{};
+  void print([[maybe_unused]] const S& format, [[maybe_unused]] const Args&... args){};
+  void write([[maybe_unused]] const std::string& message) override{};
+  void print([[maybe_unused]] const std::string& message) override{};
   void enable() override{};
   void disable() override{};
   void enable(MAYBE_UNUSED(bool enable)){};
diff --git a/include/bout/sundials_backports.hxx b/include/bout/sundials_backports.hxx
index 9c96de1935..c5e0f3ab15 100644
--- a/include/bout/sundials_backports.hxx
+++ b/include/bout/sundials_backports.hxx
@@ -39,7 +39,7 @@ inline void SUNNonlinSolFree(MAYBE_UNUSED(SUNNonlinearSolver solver)) {}
 #if SUNDIALS_VERSION_MAJOR < 6
 namespace sundials {
 struct Context {
-  Context(MPI_Comm comm MAYBE_UNUSED() = MPI_COMM_NULL) {}
+  Context(void* comm MAYBE_UNUSED()) {}
 };
 } // namespace sundials
 
diff --git a/include/bout/utils.hxx b/include/bout/utils.hxx
index d221abcbad..450d3eaf87 100644
--- a/include/bout/utils.hxx
+++ b/include/bout/utils.hxx
@@ -661,30 +661,16 @@ std::string trimComments(const std::string& s, const std::string& c = "#;");
 /// https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance
 std::string::size_type editDistance(const std::string& str1, const std::string& str2);
 
-/// the bout_vsnprintf macro:
-/// The first argument is an char * buffer of length len.
-/// It needs to have been allocated with new[], as it may be
-/// reallocated.
-/// len: the length of said buffer. May be changed, mussn't be const.
-/// fmt: the const char * descriping the format.
-/// note that fmt should be the first argument of the function of type
-/// const char * and has to be directly followed by the variable arguments.
-#define bout_vsnprintf(buf, len, fmt)                 \
-  {                                                   \
-    va_list va;                                       \
-    va_start(va, fmt);                                \
-    int _vsnprintflen = vsnprintf(buf, len, fmt, va); \
-    va_end(va);                                       \
-    if (_vsnprintflen + 1 > int(len)) {               \
-      _vsnprintflen += 1;                             \
-      delete[] buf;                                   \
-      buf = new char[_vsnprintflen];                  \
-      len = _vsnprintflen;                            \
-      va_start(va, fmt);                              \
-      vsnprintf(buf, len, fmt, va);                   \
-      va_end(va);                                     \
-    }                                                 \
-  }
+// from https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
+template <class T>
+typename std::enable_if<!std::numeric_limits<T>::is_integer, bool>::type
+almost_equal(T x, T y, int ulp = 2) {
+  // the machine epsilon has to be scaled to the magnitude of the values used
+  // and multiplied by the desired precision in ULPs (units in the last place)
+  return std::fabs(x - y) <= std::numeric_limits<T>::epsilon() * std::fabs(x + y) * ulp
+         // unless the result is subnormal
+         || std::fabs(x - y) < std::numeric_limits<T>::min();
+}
 
 /// Convert pointer or reference to pointer
 /// This allows consistent handling of both in macros, templates
diff --git a/manual/Makefile b/manual/Makefile
index 1e1d674a3f..6da41e4a71 100644
--- a/manual/Makefile
+++ b/manual/Makefile
@@ -1,5 +1,5 @@
 # Makefile for the reference and user manuals
-.PHONY:all clean sphinx html pdf doxygen breathe-autogen
+.PHONY:all clean sphinx html pdf doxygen breathe-autogen maybe-doxygen
 
 BOUT_TOP?=..
 
@@ -14,26 +14,30 @@ manual: all
 pdf: sphinx-pdf
 html: sphinx-html
 man: sphinx-man
-sphinx: sphinx-pdf
+sphinx: sphinx-html
 
-sphinx-pdf: doxygen
+sphinx-pdf: maybe-doxygen
 	PYTHONPATH=$(BOUT_TOP)/tools/pylib:$$PYTHONPATH $(sphinx-build) -b latex sphinx/ build/
 	cd build && latexmk -pdf BOUT -interaction=batchmode
 	test -e BOUT.pdf || ln -s build/BOUT.pdf .
 	@echo "Documentation is available in $$(pwd)/BOUT.pdf"
 
-sphinx-html: doxygen
+sphinx-html: maybe-doxygen
 	PYTHONPATH=$(BOUT_TOP)/tools/pylib:$$PYTHONPATH $(sphinx-build) -b html sphinx/ html/
 	@echo "Documentation available in file://$$(pwd)/html/index.html"
 
-sphinx-man: doxygen
+sphinx-man: maybe-doxygen
 	PYTHONPATH=$(BOUT_TOP)/tools/pylib:$$PYTHONPATH $(sphinx-build) -b man sphinx/ man/
 	@echo "Documentation available in $$(pwd)/man/bout.1"
 
 # Run doxygen, ignore if it fails (leading '-')
-doxygen:
+maybe-doxygen:
 	-cd doxygen && doxygen Doxyfile
 
+
+doxygen:
+	cd doxygen && doxygen Doxyfile
+
 # Run breathe-apidoc, ignore if it fails (leading '-')
 breathe-autogen: doxygen
 	-breathe-apidoc -f -o sphinx/_breathe_autogen doxygen/bout/xml
diff --git a/manual/README.md b/manual/README.md
index 1003f0ed19..9f69c00548 100644
--- a/manual/README.md
+++ b/manual/README.md
@@ -12,30 +12,28 @@ To build the manual locally, you need at least "sphinx" and
 "recommonmark", which you can install using pip (or pip3):
 
 ```bash
-$ pip install --user sphinx
-$ pip install --user recommonmark
+$ pip install -r sphinx/requirements.txt
 ```
 
-These documents can be built into a PDF using "sphinx-build":
+To get a local html version, run
 
 ```bash
-$ make
+$ make html
 ```
 
+This should create a file "index.html" in the "manual/html" directory.
+
 To use e.g. "sphinx-build-3" instead of "sphinx-build", run
 ```bash
 $ make sphinx-build=sphinx-build-3
 ```
 
-This should create a file "BOUT.pdf" in the "manual" directory.
-
-To get a local html version, run
+These documents can be built into a PDF using "sphinx-build":
 
 ```bash
-$ make html
+$ make
 ```
-
-This should create a file "index.html" in the "manual/html" directory.
+This should create a file "BOUT.pdf" in the "manual" directory.
 
 ### API documentation
 
@@ -53,14 +51,7 @@ It is possible to build the API documentation into the main manual
 using "breathe". Install breathe:
 
 ```bash
-$ pip install --user breathe
-```
-
-and comment out the following line in `sphinx/conf.py`:
-
-```python
-# Disable breathe
-has_breathe = False
+$ pip install breathe
 ```
 
 You can then build the sphinx documentation as normal.
diff --git a/manual/sphinx/_apidoc/boutpp.rst b/manual/sphinx/_apidoc/boutpp.rst
index d28d7c7d18..71a092ca17 100644
--- a/manual/sphinx/_apidoc/boutpp.rst
+++ b/manual/sphinx/_apidoc/boutpp.rst
@@ -13,16 +13,4 @@ Module contents
 
 .. automodule:: boutpp
     :members:
-
-Undocumented functions
-----------------------
-
-.. automodule:: boutpp
-   :undoc-members:
-
-Special and inherited functions
--------------------------------
-
-.. automodule:: boutpp
-   :special-members:
-   :inherited-members:
+    :imported-members:
diff --git a/manual/sphinx/conf.py b/manual/sphinx/conf.py
index a643774553..c5a9f4716d 100755
--- a/manual/sphinx/conf.py
+++ b/manual/sphinx/conf.py
@@ -67,6 +67,8 @@ def __getattr__(cls, name):
         "scipy.ndimage.filters",
         "scipy.ndimage.morphology",
         "scipy.spatial",
+        "past",
+        "crosslines",
     ]
     sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
     print(os.environ)
@@ -86,6 +88,7 @@ def __getattr__(cls, name):
         + " -DBOUT_ENABLE_PYTHON=ON"
         + " -DBOUT_UPDATE_GIT_SUBMODULE=OFF"
         + " -DBOUT_TESTS=OFF"
+        + " -DBOUT_ALLOW_INSOURCE_BUILD=ON"
         + f" -DPython_ROOT_DIR={pydir}"
         + f" -Dmpark_variant_DIR={pwd}/externalpackages/mpark.variant/"
         + f" -Dfmt_DIR={pwd}/externalpackages/fmt/"
@@ -174,8 +177,8 @@ def __getattr__(cls, name):
 
 # General information about the project.
 project = "BOUT++"
-copyright = "2017, B. Dudson"
-author = "The BOUT++ team"
+copyright = "2017-2023"
+author = "B. Dudson and The BOUT++ team"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -231,6 +234,7 @@ def __getattr__(cls, name):
 #
 html_theme_options = dict(
     repository_url="https://github.com/boutproject/BOUT-dev",
+    repository_branch="master",
     path_to_docs="manual/sphinx",
     use_edit_page_button=True,
     use_repository_button=True,
diff --git a/manual/sphinx/developer_docs/file_io.rst b/manual/sphinx/developer_docs/file_io.rst
index ad44c97277..aece8cfb1e 100644
--- a/manual/sphinx/developer_docs/file_io.rst
+++ b/manual/sphinx/developer_docs/file_io.rst
@@ -92,7 +92,7 @@ similar interface to ``Datafile``, and just like ``Datafile`` it also
 stores pointers to variables. This means that it still suffers from
 all of the downsides of ``Datafile``, and developers are encouraged to
 move to the ``outputVars`` approach. The `SAVE_ONCE`/`SAVE_REPEAT`
-macros also work through `DataFileFacade`` -- this means that they
+macros also work through ``DataFileFacade`` -- this means that they
 cannot be used outside of ``PhysicsModel`` methods!
 
 
diff --git a/manual/sphinx/user_docs/advanced_install.rst b/manual/sphinx/user_docs/advanced_install.rst
index fddbbcec8b..b2b6b49c80 100644
--- a/manual/sphinx/user_docs/advanced_install.rst
+++ b/manual/sphinx/user_docs/advanced_install.rst
@@ -113,32 +113,38 @@ Cori
 First set up the environment by loading the correct modules. For Bash shell use:
 
 .. code-block:: bash
+
    source config/cori/setup-env-cgpu.sh
 
 and for C shell:
 
 .. code-block:: csh
+
    source config/cori/setup-env-cgpu.sh
 
 Then configure BOUT++ by running a script which calls CMake. Under bash:
 
 .. code-block:: bash
+
    ./config/cori/config-bout-cgpu.sh
 
 and C shell:
 
 .. code-block:: csh
+
    ./config/cori/config-bout-cgpu.csh
 
 At the time of writing, Hypre linking is not working with CUDA. If you come across
 errors with the above configuration, try turning off Hypre support:
 
 .. code-block:: bash
+
    ./config/cori/config-bout-cgpu-nohypre.sh
 
 or
 
 .. code-block:: csh
+
    ./config/cori/config-bout-cgpu-nohypre.csh
 
 See section :ref:`sec-gpusupport` for details of compiling and running
diff --git a/manual/sphinx/user_docs/installing.rst b/manual/sphinx/user_docs/installing.rst
index b09d219543..28c03b6730 100644
--- a/manual/sphinx/user_docs/installing.rst
+++ b/manual/sphinx/user_docs/installing.rst
@@ -443,27 +443,27 @@ configuration::
 If not, see :ref:`sec-advancedinstall` for some things you can try to
 resolve common problems.
 
-Working with an active `conda` environment
+Working with an active ``conda`` environment
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-When `conda` is used, it installs separate versions of several libraries. These
+When ``conda`` is used, it installs separate versions of several libraries. These
 can cause warnings or even failures when linking BOUT++ executables. There are
 several alternatives to deal with this problem:
-* The simplest but least convenient option is to use `conda deactivate` before
+* The simplest but least convenient option is to use ``conda deactivate`` before
   configuring, compiling, or running any BOUT++ program.
 * You might sometimes want to link to the conda-installed libraries. This is
   probably not ideal for production runs on an HPC system (as conda downloads
   binary packages that will not be optimized for specific hardware), but can be
   a simple way to get packages for testing or on a personal computer. In this
-  case just keep your `conda` environment active, and with luck the libraries
+  case just keep your ``conda`` environment active, and with luck the libraries
   should be picked up by the standard search mechanisms.
 * In case you do want a fully optimized and as-stable-as-possible build for
   production runs, it is probably best not to depend on any conda packages for
-  compiling or running BOUT++ executables (restrict `conda` to providing Python
+  compiling or running BOUT++ executables (restrict ``conda`` to providing Python
   packages for post-processing, and their dependencies). Passing
-  `-DBOUT_IGNORE_CONDA_ENV=ON` (default `OFF`) excludes anything in the conda
+  ``-DBOUT_IGNORE_CONDA_ENV=ON`` (default ``OFF``) excludes anything in the conda
   environment from CMake search paths. This should totally separate BOUT++ from
-  the `conda` environment.
+  the ``conda`` environment.
 
 .. _sec-config-nls:
 
diff --git a/manual/sphinx/user_docs/parallel-transforms.rst b/manual/sphinx/user_docs/parallel-transforms.rst
index ad7295bbee..3ee3eccfb8 100644
--- a/manual/sphinx/user_docs/parallel-transforms.rst
+++ b/manual/sphinx/user_docs/parallel-transforms.rst
@@ -70,8 +70,8 @@ setting is
 
 .. code-block:: cfg
 
-   [mesh]
-   paralleltransform = identity
+   [mesh:paralleltransform]
+   type = identity
 
 
 This then uses the `ParallelTransformIdentity` class to calculate the
@@ -100,8 +100,8 @@ The shifted metric method is selected using:
 
 .. code-block:: cfg
 
-   [mesh]
-   paralleltransform = shifted
+   [mesh:paralleltransform]
+   type = shifted
 
 so that mesh uses the `ShiftedMetric` class to calculate parallel
 transforms.  During initialisation, this class reads a quantity zShift
@@ -141,8 +141,8 @@ calculation of parallel slices. Select it by using:
 
 .. code-block:: cfg
 
-   [mesh]
-   paralleltransform = shifted
+   [mesh:paralleltransform]
+   type = shifted
    calcParallelSlices_on_communicate = false
 
 With these settings, inputs to parallel derivative or interpolation
@@ -177,8 +177,8 @@ To use the FCI method for parallel transforms, set
 
 .. code-block:: cfg
 
-   [mesh]
-   paralleltransform = fci
+   [mesh:paralleltransform]
+   type = fci
 
 which causes the `FCITransform` class to be used for parallel
 transforms.  This reads four variables (3D fields) from the input
diff --git a/manual/sphinx/user_docs/time_integration.rst b/manual/sphinx/user_docs/time_integration.rst
index 9d5a34ea89..7d0b56abcc 100644
--- a/manual/sphinx/user_docs/time_integration.rst
+++ b/manual/sphinx/user_docs/time_integration.rst
@@ -151,6 +151,20 @@ for a certain variable (e.g. ``[n]``), setting the option
 ``positivity_constraint`` to one of ``positive``, ``non_negative``,
 ``negative``, or ``non_positive``.
 
+Additional options can be used to modify the behaviour of the linear and
+nonlinear solvers:
+
+- ``cvode_nonlinear_convergence_coef`` specifies the safety factor
+  used in the nonlinear convergence test. Passed as a parameter to
+  `CVodeSetNonlinConvCoef
+  <https://sundials.readthedocs.io/en/latest/cvodes/Usage/SIM.html#c.CVodeSetNonlinConvCoef>`_.
+
+- ``cvode_linear_convergence_coef`` specifies the factor by which the
+  Krylov linear solver’s convergence test constant is reduced from the
+  nonlinear solver test constant. Passed as a parameter to
+  `CVodeSetEpsLin
+  <https://sundials.readthedocs.io/en/latest/cvodes/Usage/SIM.html#c.CVodeSetEpsLin>`_.
+
 IMEX-BDF2
 ---------
 
@@ -358,7 +372,10 @@ iterations within a given range.
 +---------------------------+---------------+----------------------------------------------------+
 | ksp_type                  | gmres         | PETSc KSP linear solver                            |
 +---------------------------+---------------+----------------------------------------------------+
-| pc_type                   | ilu / bjacobi | PETSc PC preconditioner                            |
+| pc_type                   | ilu / bjacobi | PETSc PC preconditioner (try hypre in parallel)    |
++---------------------------+---------------+----------------------------------------------------+
+| pc_hypre_type             | pilut         | If ``pc_type = hypre``.                            |
+|                           |               | Hypre preconditioner type: euclid, boomeramg       |
 +---------------------------+---------------+----------------------------------------------------+
 | max_nonlinear_iterations  | 20            | If exceeded, solve restarts with timestep / 2      |
 +---------------------------+---------------+----------------------------------------------------+
diff --git a/src/bout++.cxx b/src/bout++.cxx
index 79a6738f83..7d7b38b947 100644
--- a/src/bout++.cxx
+++ b/src/bout++.cxx
@@ -822,7 +822,7 @@ BoutMonitor::BoutMonitor(BoutReal timestep, Options& options)
                           .doc(_("Name of file whose existence triggers a stop"))
                           .withDefault("BOUT.stop"))) {}
 
-int BoutMonitor::call(Solver* solver, BoutReal t, int iter, int NOUT) {
+int BoutMonitor::call(Solver* solver, BoutReal t, MAYBE_UNUSED(int iter), int NOUT) {
   TRACE("BoutMonitor::call({:e}, {:d}, {:d})", t, iter, NOUT);
 
   // Increment Solver's iteration counter, and set the global `iteration`
diff --git a/src/invert/laplace/impls/petsc/petsc_laplace.cxx b/src/invert/laplace/impls/petsc/petsc_laplace.cxx
index 107f3912d9..1df560f288 100644
--- a/src/invert/laplace/impls/petsc/petsc_laplace.cxx
+++ b/src/invert/laplace/impls/petsc/petsc_laplace.cxx
@@ -148,8 +148,6 @@ LaplacePetsc::LaplacePetsc(Options* opt, const CELL_LOC loc, Mesh* mesh_in,
   MatCreate(comm, &MatA);
   MatSetSizes(MatA, localN, localN, size, size);
   MatSetFromOptions(MatA);
-  //   if (fourth_order) MatMPIAIJSetPreallocation( MatA, 25, PETSC_NULL, 10, PETSC_NULL );
-  //   else MatMPIAIJSetPreallocation( MatA, 9, PETSC_NULL, 3, PETSC_NULL );
 
   /* Pre allocate memory
    * nnz denotes an array containing the number of non-zeros in the various rows
@@ -854,9 +852,11 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) {
   KSPGetConvergedReason(ksp, &reason);
   if (reason == -3) { // Too many iterations, might be fixed by taking smaller timestep
     throw BoutIterationFail("petsc_laplace: too many iterations");
-  } else if (reason <= 0) {
-    output << "KSPConvergedReason is " << reason << endl;
-    throw BoutException("petsc_laplace: inversion failed to converge.");
+  }
+  if (reason <= 0) {
+    throw BoutException(
+        "petsc_laplace: inversion failed to converge. KSPConvergedReason: {} ({})",
+        KSPConvergedReasons[reason], reason);
   }
 
   // Add data to FieldPerp Object
diff --git a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx
index 460bec865b..24d61af783 100644
--- a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx
+++ b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx
@@ -265,8 +265,9 @@ Field3D LaplacePetsc3dAmg::solve(const Field3D& b_in, const Field3D& x0) {
     throw BoutIterationFail("Petsc3dAmg: too many iterations");
   }
   if (reason <= 0) {
-    output << "KSPConvergedReason is " << reason << "\n";
-    throw BoutException("Petsc3dAmg: inversion failed to converge.");
+    throw BoutException(
+        "Petsc3dAmg: inversion failed to converge. KSPConvergedReason: {} ({})",
+        KSPConvergedReasons[reason], reason);
   }
 
   // Create field from result
@@ -511,7 +512,9 @@ void LaplacePetsc3dAmg::updateMatrix3D() {
 
     // Set the relative and absolute tolerances
     PCSetType(pc, pctype.c_str());
+#if PETSC_VERSION_LT(3, 18, 0)
     PCGAMGSetSymGraph(pc, PETSC_TRUE);
+#endif
   }
   lib.setOptionsFromInputFile(ksp);
 
diff --git a/src/invert/pardiv/impls/cyclic/pardiv_cyclic.cxx b/src/invert/pardiv/impls/cyclic/pardiv_cyclic.cxx
index 32cfea8306..c17e5af64d 100644
--- a/src/invert/pardiv/impls/cyclic/pardiv_cyclic.cxx
+++ b/src/invert/pardiv/impls/cyclic/pardiv_cyclic.cxx
@@ -40,16 +40,15 @@
 
 #if not BOUT_USE_METRIC_3D
 
+#include <bout/boutexception.hxx>
 #include <bout/constants.hxx>
-#include <boutexception.hxx>
-#include <cyclic_reduction.hxx>
-#include <derivs.hxx>
-#include <fft.hxx>
-#include <globals.hxx>
-#include <msg_stack.hxx>
-#include <utils.hxx>
-
+#include <bout/cyclic_reduction.hxx>
+#include <bout/derivs.hxx>
+#include <bout/fft.hxx>
+#include <bout/globals.hxx>
+#include <bout/msg_stack.hxx>
 #include <bout/surfaceiter.hxx>
+#include <bout/utils.hxx>
 
 #include <cmath>
 
diff --git a/src/invert/pardiv/impls/cyclic/pardiv_cyclic.hxx b/src/invert/pardiv/impls/cyclic/pardiv_cyclic.hxx
index 2ce4f1acc9..fdcdab055e 100644
--- a/src/invert/pardiv/impls/cyclic/pardiv_cyclic.hxx
+++ b/src/invert/pardiv/impls/cyclic/pardiv_cyclic.hxx
@@ -52,9 +52,9 @@ RegisterUnavailableInvertParDiv registerinvertpardivcyclic{
 
 #else
 
-#include "dcomplex.hxx"
-#include "utils.hxx"
-#include <globals.hxx>
+#include <bout/dcomplex.hxx>
+#include <bout/globals.hxx>
+#include <bout/utils.hxx>
 
 class InvertParDivCR : public InvertParDiv {
 public:
diff --git a/src/physics/physicsmodel.cxx b/src/physics/physicsmodel.cxx
index f0fbffb0fb..cac4bda5cc 100644
--- a/src/physics/physicsmodel.cxx
+++ b/src/physics/physicsmodel.cxx
@@ -99,7 +99,6 @@ void PhysicsModel::initialise(Solver* s) {
   solver = s;
 
   bout::experimental::addBuildFlagsToOptions(output_options);
-  mesh->outputVars(output_options);
 
   // Restart option
   const bool restarting = Options::root()["restart"].withDefault(false);
@@ -113,6 +112,8 @@ void PhysicsModel::initialise(Solver* s) {
     throw BoutException("Couldn't initialise physics model");
   }
 
+  mesh->outputVars(output_options);
+
   // Post-initialise, which reads restart files
   // This function can be overridden by the user
   if (postInit(restarting) != 0) {
diff --git a/src/solver/impls/arkode/arkode.cxx b/src/solver/impls/arkode/arkode.cxx
index 09aa5b9ea2..13e0dd817e 100644
--- a/src/solver/impls/arkode/arkode.cxx
+++ b/src/solver/impls/arkode/arkode.cxx
@@ -226,7 +226,7 @@ ArkodeSolver::ArkodeSolver(Options* opts)
                        .withDefault(false)),
       optimize(
           (*options)["optimize"].doc("Use ARKode optimal parameters").withDefault(false)),
-      suncontext(MPI_COMM_WORLD) {
+      suncontext(static_cast<void*>(&BoutComm::get())) {
   has_constraints = false; // This solver doesn't have constraints
 
   // Add diagnostics to output
diff --git a/src/solver/impls/cvode/cvode.cxx b/src/solver/impls/cvode/cvode.cxx
index 4701d02f69..70eb3b8841 100644
--- a/src/solver/impls/cvode/cvode.cxx
+++ b/src/solver/impls/cvode/cvode.cxx
@@ -174,7 +174,16 @@ CvodeSolver::CvodeSolver(Options* opts)
                     .doc("Use right preconditioner? Otherwise use left.")
                     .withDefault(false)),
       use_jacobian((*options)["use_jacobian"].withDefault(false)),
-      suncontext(MPI_COMM_WORLD) {
+      cvode_nonlinear_convergence_coef(
+          (*options)["cvode_nonlinear_convergence_coef"]
+              .doc("Safety factor used in the nonlinear convergence test")
+              .withDefault(0.1)),
+      cvode_linear_convergence_coef(
+          (*options)["cvode_linear_convergence_coef"]
+              .doc("Factor by which the Krylov linear solver’s convergence test constant "
+                   "is reduced from the nonlinear solver test constant.")
+              .withDefault(0.05)),
+      suncontext(static_cast<void*>(&BoutComm::get())) {
   has_constraints = false; // This solver doesn't have constraints
   canReset = true;
 
@@ -458,6 +467,10 @@ int CvodeSolver::init() {
 #endif
   }
 
+  // Set internal tolerance factors
+  CVodeSetNonlinConvCoef(cvode_mem, cvode_nonlinear_convergence_coef);
+  CVodeSetEpsLin(cvode_mem, cvode_linear_convergence_coef);
+
   cvode_initialised = true;
 
   return 0;
diff --git a/src/solver/impls/cvode/cvode.hxx b/src/solver/impls/cvode/cvode.hxx
index b2ed83568b..fa8b972bca 100644
--- a/src/solver/impls/cvode/cvode.hxx
+++ b/src/solver/impls/cvode/cvode.hxx
@@ -123,6 +123,8 @@ private:
   /// Use right preconditioner? Otherwise use left.
   bool rightprec;
   bool use_jacobian;
+  BoutReal cvode_nonlinear_convergence_coef;
+  BoutReal cvode_linear_convergence_coef;
 
   // Diagnostics from CVODE
   int nsteps{0};
diff --git a/src/solver/impls/ida/ida.cxx b/src/solver/impls/ida/ida.cxx
index cf057ac74c..b008ebf903 100644
--- a/src/solver/impls/ida/ida.cxx
+++ b/src/solver/impls/ida/ida.cxx
@@ -101,7 +101,7 @@ IdaSolver::IdaSolver(Options* opts)
       correct_start((*options)["correct_start"]
                         .doc("Correct the initial values")
                         .withDefault(true)),
-      suncontext(MPI_COMM_WORLD) {
+      suncontext(static_cast<void*>(&BoutComm::get())) {
   has_constraints = true; // This solver has constraints
 }
 
diff --git a/src/solver/impls/imex-bdf2/imex-bdf2.cxx b/src/solver/impls/imex-bdf2/imex-bdf2.cxx
index aae92ad27d..425c3e0073 100644
--- a/src/solver/impls/imex-bdf2/imex-bdf2.cxx
+++ b/src/solver/impls/imex-bdf2/imex-bdf2.cxx
@@ -676,9 +676,9 @@ void IMEXBDF2::constructSNES(SNES* snesIn) {
           BoutComm::get(), nlocal, nlocal,  // Local sizes
           PETSC_DETERMINE, PETSC_DETERMINE, // Global sizes
           3, // Number of nonzero entries in diagonal portion of local submatrix
-          PETSC_NULL,
+          nullptr,
           0, // Number of nonzeros per row in off-diagonal portion of local submatrix
-          PETSC_NULL, &Jmf);
+          nullptr, &Jmf);
 
 #if PETSC_VERSION_GE(3, 4, 0)
       SNESSetJacobian(*snesIn, Jmf, Jmf, SNESComputeJacobianDefault, this);
diff --git a/src/solver/impls/petsc/petsc.cxx b/src/solver/impls/petsc/petsc.cxx
index ad8e99dd38..1b81ca36b6 100644
--- a/src/solver/impls/petsc/petsc.cxx
+++ b/src/solver/impls/petsc/petsc.cxx
@@ -261,11 +261,10 @@ int PetscSolver::init() {
   CHKERRQ(ierr);
 
 #if PETSC_VERSION_GE(3, 7, 0)
-  ierr = PetscOptionsGetBool(PETSC_NULL, PETSC_NULL, "-interpolate", &interpolate,
-                             PETSC_NULL);
+  ierr = PetscOptionsGetBool(nullptr, nullptr, "-interpolate", &interpolate, nullptr);
   CHKERRQ(ierr);
 #else
-  ierr = PetscOptionsGetBool(PETSC_NULL, "-interpolate", &interpolate, PETSC_NULL);
+  ierr = PetscOptionsGetBool(nullptr, "-interpolate", &interpolate, nullptr);
   CHKERRQ(ierr);
 #endif
 
@@ -273,21 +272,21 @@ int PetscSolver::init() {
   // run, if they didn't then use the standard monitor function. TODO:
   // use PetscFList
 #if PETSC_VERSION_GE(3, 7, 0)
-  ierr = PetscOptionsGetString(PETSC_NULL, PETSC_NULL, "-output_name", this->output_name,
+  ierr = PetscOptionsGetString(nullptr, nullptr, "-output_name", this->output_name,
                                sizeof this->output_name, &output_flag);
   CHKERRQ(ierr);
 #else
-  ierr = PetscOptionsGetString(PETSC_NULL, "-output_name", this->output_name,
+  ierr = PetscOptionsGetString(nullptr, "-output_name", this->output_name,
                                sizeof this->output_name, &output_flag);
   CHKERRQ(ierr);
 #endif
 
   // If the output_name is not specified then use the standard monitor function
-  if (output_flag) {
-    ierr = SNESMonitorSet(snes, PetscSNESMonitor, this, PETSC_NULL);
+  if (output_flag != 0U) {
+    ierr = SNESMonitorSet(snes, PetscSNESMonitor, this, nullptr);
     CHKERRQ(ierr);
   } else {
-    ierr = TSMonitorSet(ts, PetscMonitor, this, PETSC_NULL);
+    ierr = TSMonitorSet(ts, PetscMonitor, this, nullptr);
     CHKERRQ(ierr);
   }
 
@@ -399,11 +398,11 @@ int PetscSolver::init() {
   // Create Jacobian matrix to be used by preconditioner
   output_info << " Get Jacobian matrix at simtime " << simtime << "\n";
 #if PETSC_VERSION_GE(3, 7, 0)
-  ierr = PetscOptionsGetString(PETSC_NULL, PETSC_NULL, "-J_load", load_file,
+  ierr = PetscOptionsGetString(nullptr, nullptr, "-J_load", load_file,
                                PETSC_MAX_PATH_LEN - 1, &J_load);
   CHKERRQ(ierr);
 #else
-  ierr = PetscOptionsGetString(PETSC_NULL, "-J_load", load_file, PETSC_MAX_PATH_LEN - 1,
+  ierr = PetscOptionsGetString(nullptr, "-J_load", load_file, PETSC_MAX_PATH_LEN - 1,
                                &J_load);
   CHKERRQ(ierr);
 #endif
@@ -451,26 +450,26 @@ int PetscSolver::init() {
 
     // Get nonzero pattern of J - color_none !!!
     prealloc = cols * dof * dof;
-    ierr = MatSeqAIJSetPreallocation(J, prealloc, PETSC_NULL);
+    ierr = MatSeqAIJSetPreallocation(J, prealloc, nullptr);
     CHKERRQ(ierr);
-    ierr = MatMPIAIJSetPreallocation(J, prealloc, PETSC_NULL, prealloc, PETSC_NULL);
+    ierr = MatMPIAIJSetPreallocation(J, prealloc, nullptr, prealloc, nullptr);
     CHKERRQ(ierr);
 
-    prealloc =
-        cols; // why nonzeros=295900, allocated nonzeros=2816000/12800000 (*dof*dof), number of mallocs used during MatSetValues calls =256?
-    ierr = MatSeqBAIJSetPreallocation(J, dof, prealloc, PETSC_NULL);
+    // why nonzeros=295900, allocated nonzeros=2816000/12800000 (*dof*dof), number of mallocs used during MatSetValues calls =256?
+    prealloc = cols;
+    ierr = MatSeqBAIJSetPreallocation(J, dof, prealloc, nullptr);
     CHKERRQ(ierr);
-    ierr = MatMPIBAIJSetPreallocation(J, dof, prealloc, PETSC_NULL, prealloc, PETSC_NULL);
+    ierr = MatMPIBAIJSetPreallocation(J, dof, prealloc, nullptr, prealloc, nullptr);
     CHKERRQ(ierr);
 #if PETSC_VERSION_GE(3, 7, 0)
-    ierr = PetscOptionsHasName(PETSC_NULL, PETSC_NULL, "-J_slowfd", &J_slowfd);
+    ierr = PetscOptionsHasName(nullptr, nullptr, "-J_slowfd", &J_slowfd);
     CHKERRQ(ierr);
 #else
-    ierr = PetscOptionsHasName(PETSC_NULL, "-J_slowfd", &J_slowfd);
+    ierr = PetscOptionsHasName(nullptr, "-J_slowfd", &J_slowfd);
     CHKERRQ(ierr);
 #endif
-    if (J_slowfd) { // create Jacobian matrix by slow fd
-      ierr = SNESSetJacobian(snes, J, J, SNESComputeJacobianDefault, PETSC_NULL);
+    if (J_slowfd != 0U) { // create Jacobian matrix by slow fd
+      ierr = SNESSetJacobian(snes, J, J, SNESComputeJacobianDefault, nullptr);
       CHKERRQ(ierr);
       output_info << "SNESComputeJacobian J by slow fd...\n";
 
@@ -524,15 +523,15 @@ int PetscSolver::init() {
 
   // Write J in binary for study - see ~petsc/src/mat/examples/tests/ex124.c
 #if PETSC_VERSION_GE(3, 7, 0)
-  ierr = PetscOptionsHasName(PETSC_NULL, PETSC_NULL, "-J_write", &J_write);
+  ierr = PetscOptionsHasName(nullptr, nullptr, "-J_write", &J_write);
   CHKERRQ(ierr);
 #else
-  ierr = PetscOptionsHasName(PETSC_NULL, "-J_write", &J_write);
+  ierr = PetscOptionsHasName(nullptr, "-J_write", &J_write);
   CHKERRQ(ierr);
 #endif
-  if (J_write) {
-    PetscViewer viewer;
-    output_info << "\n[" << rank << "] Test TSComputeRHSJacobian() ...\n";
+  if (J_write != 0U) {
+    PetscViewer viewer = nullptr;
+    output_info.write("\n[{:d}] Test TSComputeRHSJacobian() ...\n", rank);
 #if PETSC_VERSION_GE(3, 5, 0)
     ierr = TSComputeRHSJacobian(ts, simtime, u, J, J);
     CHKERRQ(ierr);
@@ -542,22 +541,14 @@ int PetscSolver::init() {
     CHKERRQ(ierr);
 #endif
 
-    ierr = PetscSynchronizedPrintf(comm, "[{:d}] TSComputeRHSJacobian is done\n", rank);
-    CHKERRQ(ierr);
+    output.write("[{:d}] TSComputeRHSJacobian is done\n", rank);
 
-#if PETSC_VERSION_GE(3, 5, 0)
-    ierr = PetscSynchronizedFlush(comm, PETSC_STDOUT);
-    CHKERRQ(ierr);
-#else
-    ierr = PetscSynchronizedFlush(comm);
-    CHKERRQ(ierr);
-#endif
-    if (J_slowfd) {
-      output_info << "[" << rank << "] writing J in binary to data/Jrhs_dense.dat...\n";
+    if (J_slowfd != 0U) {
+      output_info.write("[{:d}] writing J in binary to data/Jrhs_dense.dat...\n", rank);
       ierr = PetscViewerBinaryOpen(comm, "data/Jrhs_dense.dat", FILE_MODE_WRITE, &viewer);
       CHKERRQ(ierr);
     } else {
-      output_info << "[" << rank << "] writing J in binary to data/Jrhs_sparse.dat...\n";
+      output_info.write("[{:d}] writing J in binary to data/Jrhs_sparse.dat...\n", rank);
       ierr =
           PetscViewerBinaryOpen(comm, "data/Jrhs_sparse.dat", FILE_MODE_WRITE, &viewer);
       CHKERRQ(ierr);
@@ -579,8 +570,6 @@ int PetscSolver::init() {
  **************************************************************************/
 
 PetscErrorCode PetscSolver::run() {
-  PetscErrorCode ierr;
-  FILE* fp = nullptr;
 
   // Set when the next call to monitor is desired
   next_output = simtime + getOutputTimestep();
@@ -592,23 +581,18 @@ PetscErrorCode PetscSolver::run() {
     bout_snes_time = bout::globals::mpi->MPI_Wtime();
   }
 
-  ierr = TSSolve(ts, u);
-  CHKERRQ(ierr);
+  CHKERRQ(TSSolve(ts, u));
 
   // Gawd, everything is a hack
-  if (this->output_flag) {
-    ierr = PetscFOpen(PETSC_COMM_WORLD, this->output_name, "w", &fp);
-    CHKERRQ(ierr);
-    ierr = PetscFPrintf(PETSC_COMM_WORLD, fp,
-                        "SNES Iteration, KSP Iterations, Wall Time, Norm\n");
-    CHKERRQ(ierr);
+  if ((this->output_flag != 0U) and (BoutComm::rank() == 0)) {
+    Output petsc_info(output_name);
+    // Don't write to stdout
+    petsc_info.disable();
+    petsc_info.write("SNES Iteration, KSP Iterations, Wall Time, Norm\n");
     for (const auto& info : snes_list) {
-      ierr = PetscFPrintf(PETSC_COMM_WORLD, fp, "{:d}, {:d}, {:e}, {:e}\n", info.it,
-                          info.linear_its, info.time, info.norm);
-      CHKERRQ(ierr);
+      petsc_info.write("{:d}, {:d}, {:e}, {:e}\n", info.it, info.linear_its, info.time,
+                       info.norm);
     }
-    ierr = PetscFClose(PETSC_COMM_WORLD, fp);
-    CHKERRQ(ierr);
   }
 
   PetscFunctionReturn(0);
@@ -876,7 +860,7 @@ PetscErrorCode PhysicsSNESApply(SNES snes, Vec x) {
   Mat A, B;
 
   PetscFunctionBegin;
-  ierr = SNESGetJacobian(snes, &A, &B, PETSC_NULL, PETSC_NULL);
+  ierr = SNESGetJacobian(snes, &A, &B, nullptr, nullptr);
   CHKERRQ(ierr);
 #if PETSC_VERSION_GE(3, 5, 0)
   ierr = SNESComputeJacobian(snes, x, A, B);
@@ -890,7 +874,7 @@ PetscErrorCode PhysicsSNESApply(SNES snes, Vec x) {
   CHKERRQ(ierr);
   ierr = KSPGetPC(ksp, &pc);
   CHKERRQ(ierr);
-  ierr = SNESGetFunction(snes, &F, PETSC_NULL, PETSC_NULL);
+  ierr = SNESGetFunction(snes, &F, nullptr, nullptr);
   CHKERRQ(ierr);
   ierr = SNESComputeFunction(snes, x, F);
   CHKERRQ(ierr);
@@ -913,7 +897,7 @@ PetscErrorCode PhysicsSNESApply(SNES snes, Vec x) {
               << ", F \\cdot Fout " << dot << "  ";
 #if PETSC_VERSION_GE(3, 5, 0)
   Vec func;
-  ierr = SNESGetFunction(snes, &func, PETSC_NULL, PETSC_NULL);
+  ierr = SNESGetFunction(snes, &func, nullptr, nullptr);
   CHKERRQ(ierr);
   ierr = VecNorm(func, NORM_2, &fnorm);
   CHKERRQ(ierr);
@@ -962,7 +946,7 @@ PetscErrorCode PetscMonitor(TS ts, PetscInt UNUSED(step), PetscReal t, Vec X, vo
   ierr = TSGetMaxTime(ts, &tfinal);
   CHKERRQ(ierr);
 #else
-  ierr = TSGetDuration(ts, PETSC_NULL, &tfinal);
+  ierr = TSGetDuration(ts, nullptr, &tfinal);
   CHKERRQ(ierr);
 #endif
 
diff --git a/src/solver/impls/slepc/slepc.cxx b/src/solver/impls/slepc/slepc.cxx
index 3f6b7a91bf..f90afe717e 100644
--- a/src/solver/impls/slepc/slepc.cxx
+++ b/src/solver/impls/slepc/slepc.cxx
@@ -162,11 +162,13 @@ SlepcSolver::SlepcSolver(Options* options) {
              .withDefault(0);
 
   tol = options_ref["tol"].doc("SLEPc tolerance").withDefault(1.0e-6);
-  maxIt = options_ref["maxIt"].doc("Maximum iterations").withDefault(PETSC_DEFAULT);
+  maxIt = options_ref["maxIt"]
+              .doc("Maximum iterations")
+              .withDefault(static_cast<int>(PETSC_DEFAULT));
 
   mpd = options_ref["mpd"]
             .doc("Maximum dimension allowed for the projected problem")
-            .withDefault(PETSC_DEFAULT);
+            .withDefault(static_cast<int>(PETSC_DEFAULT));
 
   ddtMode = options_ref["ddtMode"].withDefault(true);
 
diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx
index 69280bd6c2..dfd17a0189 100644
--- a/src/solver/impls/snes/snes.cxx
+++ b/src/solver/impls/snes/snes.cxx
@@ -57,6 +57,35 @@ static PetscErrorCode snesPCapply(PC pc, Vec x, Vec y) {
   PetscFunctionReturn(s->precon(x, y));
 }
 
+///
+/// Input Parameters:
+///   snes - nonlinear solver object
+///   x1 - location at which to evaluate Jacobian
+///   ctx - MatFDColoring context or NULL
+///
+/// Output Parameters:
+///   J - Jacobian matrix (not altered in this routine)
+///   B - newly computed Jacobian matrix to use with preconditioner (generally the same as
+///   J)
+PetscErrorCode SNESComputeJacobianScaledColor(SNES snes, Vec x1, Mat J, Mat B,
+                                              void* ctx) {
+  PetscErrorCode err = SNESComputeJacobianDefaultColor(snes, x1, J, B, ctx);
+  CHKERRQ(err);
+
+  if ((err != 0) or (ctx == nullptr)) {
+    return err;
+  }
+
+  // Get the the SNESSolver pointer from the function call context
+  SNESSolver* fctx;
+  err = MatFDColoringGetFunction(static_cast<MatFDColoring>(ctx), nullptr,
+                                 reinterpret_cast<void**>(&fctx));
+  CHKERRQ(err);
+
+  // Call the SNESSolver function
+  return fctx->scaleJacobian(B);
+}
+
 SNESSolver::SNESSolver(Options* opts)
     : Solver(opts),
       timestep(
@@ -108,6 +137,10 @@ SNESSolver::SNESSolver(Options* opts)
           (*options)["pc_type"]
               .doc("Preconditioner type. By default lets PETSc decide (ilu or bjacobi)")
               .withDefault("default")),
+      pc_hypre_type((*options)["pc_hypre_type"]
+                        .doc("hypre preconditioner type: euclid, pilut, parasails, "
+                             "boomeramg, ams, ads")
+                        .withDefault("pilut")),
       line_search_type((*options)["line_search_type"]
                            .doc("Line search type: basic, bt, l2, cp, nleqerr")
                            .withDefault("default")),
@@ -119,7 +152,10 @@ SNESSolver::SNESSolver(Options* opts)
                        .withDefault(50)),
       use_coloring((*options)["use_coloring"]
                        .doc("Use matrix coloring to calculate Jacobian?")
-                       .withDefault<bool>(true)) {}
+                       .withDefault<bool>(true)),
+      scale_rhs((*options)["scale_rhs"]
+                    .doc("Scale time derivatives?")
+                    .withDefault<bool>(false)) {}
 
 int SNESSolver::init() {
 
@@ -146,6 +182,7 @@ int SNESSolver::init() {
   int ierr;
 
   // Vectors
+  output_info.write("Creating vector\n");
   ierr = VecCreate(BoutComm::get(), &snes_x);
   CHKERRQ(ierr);
   ierr = VecSetSizes(snes_x, nlocal, PETSC_DECIDE);
@@ -158,15 +195,30 @@ int SNESSolver::init() {
 
   if (equation_form == BoutSnesEquationForm::rearranged_backward_euler) {
     // Need an intermediate vector for rearranged Backward Euler
-    VecDuplicate(snes_x, &delta_x);
+    ierr = VecDuplicate(snes_x, &delta_x);
+    CHKERRQ(ierr);
   }
 
   if (predictor) {
     // Storage for previous solution
-    VecDuplicate(snes_x, &x1);
+    ierr = VecDuplicate(snes_x, &x1);
+    CHKERRQ(ierr);
+  }
+
+  if (scale_rhs) {
+    // Storage for rhs factors, one per evolving variable
+    ierr = VecDuplicate(snes_x, &rhs_scaling_factors);
+    CHKERRQ(ierr);
+    // Set all factors to 1 to start with
+    ierr = VecSet(rhs_scaling_factors, 1.0);
+    CHKERRQ(ierr);
+    // Array to store inverse Jacobian row norms
+    ierr = VecDuplicate(snes_x, &jac_row_inv_norms);
+    CHKERRQ(ierr);
   }
 
   // Nonlinear solver interface (SNES)
+  output_info.write("Create SNES\n");
   SNESCreate(BoutComm::get(), &snes);
 
   // Set the callback function
@@ -221,23 +273,38 @@ int SNESSolver::init() {
 
       output_progress.write("Setting Jacobian matrix sizes\n");
 
-      int localN = getLocalN(); // Number of rows on this processor
       int n2d = f2d.size();
       int n3d = f3d.size();
 
-      // Set size of Matrix on each processor to localN x localN
+      // Set size of Matrix on each processor to nlocal x nlocal
       MatCreate(BoutComm::get(), &Jmf);
-      MatSetSizes(Jmf, localN, localN, PETSC_DETERMINE, PETSC_DETERMINE);
+      MatSetSizes(Jmf, nlocal, nlocal, PETSC_DETERMINE, PETSC_DETERMINE);
       MatSetFromOptions(Jmf);
 
-      std::vector<PetscInt> d_nnz(localN);
-      std::vector<PetscInt> o_nnz(localN);
+      std::vector<PetscInt> d_nnz(nlocal);
+      std::vector<PetscInt> o_nnz(nlocal);
 
       // Set values for most points
+      const int ncells_x = (mesh->LocalNx > 1) ? 2 : 0;
+      const int ncells_y = (mesh->LocalNy > 1) ? 2 : 0;
+      const int ncells_z = (mesh->LocalNz > 1) ? 2 : 0;
+
+      const auto star_pattern = (1 + ncells_x + ncells_y) * (n3d + n2d) + ncells_z * n3d;
+
+      // Offsets. Start with the central cell
+      std::vector<std::pair<int, int>> xyoffsets{{0, 0}};
+      if (ncells_x != 0) {
+        // Stencil includes points in X
+        xyoffsets.push_back({-1, 0});
+        xyoffsets.push_back({1, 0});
+      }
+      if (ncells_y != 0) {
+        // Stencil includes points in Y
+        xyoffsets.push_back({0, -1});
+        xyoffsets.push_back({0, 1});
+      }
 
-      const auto star_pattern_2d = 5 * (n3d + n2d);
-      const auto star_pattern_3d = 7 * n3d + 5 * n2d;
-      const auto star_pattern = (mesh->LocalNz > 1) ? star_pattern_3d : star_pattern_2d;
+      output_info.write("Star pattern: {} non-zero entries\n", star_pattern);
       for (int i = 0; i < localN; i++) {
         // Non-zero elements on this processor
         d_nnz[i] = star_pattern;
@@ -246,148 +313,154 @@ int SNESSolver::init() {
       }
 
       // X boundaries
-      if (mesh->firstX()) {
-        // Lower X boundary
-        for (int y = mesh->ystart; y <= mesh->yend; y++) {
-          for (int z = 0; z < mesh->LocalNz; z++) {
-            int localIndex = ROUND(index(mesh->xstart, y, z));
-            ASSERT2((localIndex >= 0) && (localIndex < localN));
-            const int num_fields = (z == 0) ? n2d + n3d : n3d;
-            for (int i = 0; i < num_fields; i++) {
-              d_nnz[localIndex + i] -= (n3d + n2d);
+      if (ncells_x != 0) {
+        if (mesh->firstX()) {
+          // Lower X boundary
+          for (int y = mesh->ystart; y <= mesh->yend; y++) {
+            for (int z = 0; z < mesh->LocalNz; z++) {
+              int localIndex = ROUND(index(mesh->xstart, y, z));
+              ASSERT2((localIndex >= 0) && (localIndex < localN));
+              const int num_fields = (z == 0) ? n2d + n3d : n3d;
+              for (int i = 0; i < num_fields; i++) {
+                d_nnz[localIndex + i] -= (n3d + n2d);
+              }
             }
           }
-        }
-      } else {
-        // On another processor
-        for (int y = mesh->ystart; y <= mesh->yend; y++) {
-          for (int z = 0; z < mesh->LocalNz; z++) {
-            int localIndex = ROUND(index(mesh->xstart, y, z));
-            ASSERT2((localIndex >= 0) && (localIndex < localN));
-            const int num_fields = (z == 0) ? n2d + n3d : n3d;
-            for (int i = 0; i < num_fields; i++) {
-              d_nnz[localIndex + i] -= (n3d + n2d);
-              o_nnz[localIndex + i] += (n3d + n2d);
+        } else {
+          // On another processor
+          for (int y = mesh->ystart; y <= mesh->yend; y++) {
+            for (int z = 0; z < mesh->LocalNz; z++) {
+              int localIndex = ROUND(index(mesh->xstart, y, z));
+              ASSERT2((localIndex >= 0) && (localIndex < localN));
+              const int num_fields = (z == 0) ? n2d + n3d : n3d;
+              for (int i = 0; i < num_fields; i++) {
+                d_nnz[localIndex + i] -= (n3d + n2d);
+                o_nnz[localIndex + i] += (n3d + n2d);
+              }
             }
           }
         }
-      }
-
-      if (mesh->lastX()) {
-        // Upper X boundary
-        for (int y = mesh->ystart; y <= mesh->yend; y++) {
-          for (int z = 0; z < mesh->LocalNz; z++) {
-            int localIndex = ROUND(index(mesh->xend, y, z));
-            ASSERT2((localIndex >= 0) && (localIndex < localN));
-            const int num_fields = (z == 0) ? n2d + n3d : n3d;
-            for (int i = 0; i < num_fields; i++) {
-              d_nnz[localIndex + i] -= (n3d + n2d);
+        if (mesh->lastX()) {
+          // Upper X boundary
+          for (int y = mesh->ystart; y <= mesh->yend; y++) {
+            for (int z = 0; z < mesh->LocalNz; z++) {
+              int localIndex = ROUND(index(mesh->xend, y, z));
+              ASSERT2((localIndex >= 0) && (localIndex < localN));
+              const int num_fields = (z == 0) ? n2d + n3d : n3d;
+              for (int i = 0; i < num_fields; i++) {
+                d_nnz[localIndex + i] -= (n3d + n2d);
+              }
             }
           }
-        }
-      } else {
-        // On another processor
-        for (int y = mesh->ystart; y <= mesh->yend; y++) {
-          for (int z = 0; z < mesh->LocalNz; z++) {
-            int localIndex = ROUND(index(mesh->xend, y, z));
-            ASSERT2((localIndex >= 0) && (localIndex < localN));
-            const int num_fields = (z == 0) ? n2d + n3d : n3d;
-            for (int i = 0; i < num_fields; i++) {
-              d_nnz[localIndex + i] -= (n3d + n2d);
-              o_nnz[localIndex + i] += (n3d + n2d);
+        } else {
+          // On another processor
+          for (int y = mesh->ystart; y <= mesh->yend; y++) {
+            for (int z = 0; z < mesh->LocalNz; z++) {
+              int localIndex = ROUND(index(mesh->xend, y, z));
+              ASSERT2((localIndex >= 0) && (localIndex < localN));
+              const int num_fields = (z == 0) ? n2d + n3d : n3d;
+              for (int i = 0; i < num_fields; i++) {
+                d_nnz[localIndex + i] -= (n3d + n2d);
+                o_nnz[localIndex + i] += (n3d + n2d);
+              }
             }
           }
         }
       }
 
       // Y boundaries
-
-      for (int x = mesh->xstart; x <= mesh->xend; x++) {
-        // Default to no boundary
-        // NOTE: This assumes that communications in Y are to other
-        //   processors. If Y is communicated with this processor (e.g. NYPE=1)
-        //   then this will result in PETSc warnings about out of range allocations
-
-        // z = 0 case
-        int localIndex = ROUND(index(x, mesh->ystart, 0));
-        ASSERT2(localIndex >= 0);
-
-        // All 2D and 3D fields
-        for (int i = 0; i < n2d + n3d; i++) {
-          o_nnz[localIndex + i] += (n3d + n2d);
-        }
-
-        for (int z = 1; z < mesh->LocalNz; z++) {
-          localIndex = ROUND(index(x, mesh->ystart, z));
-
-          // Only 3D fields
-          for (int i = 0; i < n3d; i++) {
+      if (ncells_y != 0) {
+        for (int x = mesh->xstart; x <= mesh->xend; x++) {
+          // Default to no boundary
+          // NOTE: This assumes that communications in Y are to other
+          //   processors. If Y is communicated with this processor (e.g. NYPE=1)
+          //   then this will result in PETSc warnings about out of range allocations
+
+          // z = 0 case
+          int localIndex = ROUND(index(x, mesh->ystart, 0));
+          ASSERT2(localIndex >= 0);
+
+          // All 2D and 3D fields
+          for (int i = 0; i < n2d + n3d; i++) {
             o_nnz[localIndex + i] += (n3d + n2d);
+            d_nnz[localIndex + i] -= (n3d + n2d);
           }
-        }
 
-        // z = 0 case
-        localIndex = ROUND(index(x, mesh->yend, 0));
-        // All 2D and 3D fields
-        for (int i = 0; i < n2d + n3d; i++) {
-          o_nnz[localIndex + i] += (n3d + n2d);
-        }
+          for (int z = 1; z < mesh->LocalNz; z++) {
+            localIndex = ROUND(index(x, mesh->ystart, z));
 
-        for (int z = 1; z < mesh->LocalNz; z++) {
-          localIndex = ROUND(index(x, mesh->yend, z));
+            // Only 3D fields
+            for (int i = 0; i < n3d; i++) {
+              o_nnz[localIndex + i] += (n3d + n2d);
+              d_nnz[localIndex + i] -= (n3d + n2d);
+            }
+          }
 
-          // Only 3D fields
-          for (int i = 0; i < n3d; i++) {
+          // z = 0 case
+          localIndex = ROUND(index(x, mesh->yend, 0));
+          // All 2D and 3D fields
+          for (int i = 0; i < n2d + n3d; i++) {
             o_nnz[localIndex + i] += (n3d + n2d);
+            d_nnz[localIndex + i] -= (n3d + n2d);
           }
-        }
-      }
 
-      for (RangeIterator it = mesh->iterateBndryLowerY(); !it.isDone(); it++) {
-        // A boundary, so no communication
+          for (int z = 1; z < mesh->LocalNz; z++) {
+            localIndex = ROUND(index(x, mesh->yend, z));
 
-        // z = 0 case
-        int localIndex = ROUND(index(it.ind, mesh->ystart, 0));
-        if (localIndex < 0) {
-          // This can occur because it.ind includes values in x boundary e.g. x=0
-          continue;
-        }
-        // All 2D and 3D fields
-        for (int i = 0; i < n2d + n3d; i++) {
-          o_nnz[localIndex + i] -= (n3d + n2d);
+            // Only 3D fields
+            for (int i = 0; i < n3d; i++) {
+              o_nnz[localIndex + i] += (n3d + n2d);
+              d_nnz[localIndex + i] -= (n3d + n2d);
+            }
+          }
         }
 
-        for (int z = 1; z < mesh->LocalNz; z++) {
-          int localIndex = ROUND(index(it.ind, mesh->ystart, z));
+        for (RangeIterator it = mesh->iterateBndryLowerY(); !it.isDone(); it++) {
+          // A boundary, so no communication
 
-          // Only 3D fields
-          for (int i = 0; i < n3d; i++) {
+          // z = 0 case
+          int localIndex = ROUND(index(it.ind, mesh->ystart, 0));
+          if (localIndex < 0) {
+            // This can occur because it.ind includes values in x boundary e.g. x=0
+            continue;
+          }
+          // All 2D and 3D fields
+          for (int i = 0; i < n2d + n3d; i++) {
             o_nnz[localIndex + i] -= (n3d + n2d);
           }
-        }
-      }
 
-      for (RangeIterator it = mesh->iterateBndryUpperY(); !it.isDone(); it++) {
-        // A boundary, so no communication
+          for (int z = 1; z < mesh->LocalNz; z++) {
+            int localIndex = ROUND(index(it.ind, mesh->ystart, z));
 
-        // z = 0 case
-        int localIndex = ROUND(index(it.ind, mesh->yend, 0));
-        if (localIndex < 0) {
-          continue; // Out of domain
+            // Only 3D fields
+            for (int i = 0; i < n3d; i++) {
+              o_nnz[localIndex + i] -= (n3d + n2d);
+            }
+          }
         }
 
-        // All 2D and 3D fields
-        for (int i = 0; i < n2d + n3d; i++) {
-          o_nnz[localIndex + i] -= (n3d + n2d);
-        }
+        for (RangeIterator it = mesh->iterateBndryUpperY(); !it.isDone(); it++) {
+          // A boundary, so no communication
 
-        for (int z = 1; z < mesh->LocalNz; z++) {
-          int localIndex = ROUND(index(it.ind, mesh->yend, z));
+          // z = 0 case
+          int localIndex = ROUND(index(it.ind, mesh->yend, 0));
+          if (localIndex < 0) {
+            continue; // Out of domain
+          }
 
-          // Only 3D fields
-          for (int i = 0; i < n3d; i++) {
+          // All 2D and 3D fields
+          for (int i = 0; i < n2d + n3d; i++) {
             o_nnz[localIndex + i] -= (n3d + n2d);
           }
+
+          for (int z = 1; z < mesh->LocalNz; z++) {
+            int localIndex = ROUND(index(it.ind, mesh->yend, z));
+
+            // Only 3D fields
+            for (int i = 0; i < n3d; i++) {
+              o_nnz[localIndex + i] -= (n3d + n2d);
+            }
+          }
         }
       }
 
@@ -395,15 +468,19 @@ int SNESSolver::init() {
 
       // Pre-allocate
       MatMPIAIJSetPreallocation(Jmf, 0, d_nnz.data(), 0, o_nnz.data());
+      MatSeqAIJSetPreallocation(Jmf, 0, d_nnz.data());
       MatSetUp(Jmf);
-      MatSetOption(Jmf, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
+      MatSetOption(Jmf, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE);
 
       // Determine which row/columns of the matrix are locally owned
       int Istart, Iend;
       MatGetOwnershipRange(Jmf, &Istart, &Iend);
 
       // Convert local into global indices
-      index += Istart;
+      // Note: Not in the boundary cells, to keep -1 values
+      for (const auto& i : mesh->getRegion3D("RGN_NOBNDRY")) {
+        index[i] += Istart;
+      }
 
       // Now communicate to fill guard cells
       mesh->communicate(index);
@@ -413,11 +490,6 @@ int SNESSolver::init() {
 
       output_progress.write("Marking non-zero Jacobian entries\n");
 
-      // Offsets for a 5-point pattern
-      constexpr std::size_t stencil_size = 5;
-      const std::array<int, stencil_size> xoffset = {0, -1, 1, 0, 0};
-      const std::array<int, stencil_size> yoffset = {0, 0, 0, -1, 1};
-
       PetscScalar val = 1.0;
 
       for (int x = mesh->xstart; x <= mesh->xend; x++) {
@@ -430,9 +502,9 @@ int SNESSolver::init() {
             PetscInt row = ind0 + i;
 
             // Loop through each point in the 5-point stencil
-            for (std::size_t c = 0; c < stencil_size; c++) {
-              int xi = x + xoffset[c];
-              int yi = y + yoffset[c];
+            for (const auto& xyoffset : xyoffsets) {
+              int xi = x + xyoffset.first;
+              int yi = y + xyoffset.second;
 
               if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx)
                   || (yi >= mesh->LocalNy)) {
@@ -448,8 +520,8 @@ int SNESSolver::init() {
               // Depends on all variables on this cell
               for (int j = 0; j < n2d; j++) {
                 PetscInt col = ind2 + j;
-
-                MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                ierr = MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                CHKERRQ(ierr);
               }
             }
           }
@@ -468,13 +540,14 @@ int SNESSolver::init() {
               // Depends on 2D fields
               for (int j = 0; j < n2d; j++) {
                 PetscInt col = ind0 + j;
-                MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                ierr = MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                CHKERRQ(ierr);
               }
 
-              // 5 point star pattern
-              for (std::size_t c = 0; c < stencil_size; c++) {
-                int xi = x + xoffset[c];
-                int yi = y + yoffset[c];
+              // Star pattern
+              for (const auto& xyoffset : xyoffsets) {
+                int xi = x + xyoffset.first;
+                int yi = y + xyoffset.second;
 
                 if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx)
                     || (yi >= mesh->LocalNy)) {
@@ -493,7 +566,12 @@ int SNESSolver::init() {
                 // 3D fields on this cell
                 for (int j = 0; j < n3d; j++) {
                   PetscInt col = ind2 + j;
-                  MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                  ierr = MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                  if (ierr != 0) {
+                    output.write("ERROR: {} : ({}, {}) -> ({}, {}) : {} -> {}\n", row, x,
+                                 y, xi, yi, ind2, ind2 + n3d - 1);
+                  }
+                  CHKERRQ(ierr);
                 }
               }
 
@@ -509,7 +587,8 @@ int SNESSolver::init() {
                 }
                 for (int j = 0; j < n3d; j++) {
                   PetscInt col = ind2 + j;
-                  MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                  ierr = MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                  CHKERRQ(ierr);
                 }
 
                 int zm = (z - 1 + nz) % nz;
@@ -519,7 +598,8 @@ int SNESSolver::init() {
                 }
                 for (int j = 0; j < n3d; j++) {
                   PetscInt col = ind2 + j;
-                  MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                  ierr = MatSetValues(Jmf, 1, &row, 1, &col, &val, INSERT_VALUES);
+                  CHKERRQ(ierr);
                 }
               }
             }
@@ -556,7 +636,7 @@ int SNESSolver::init() {
       MatFDColoringSetUp(Jmf, iscoloring, fdcoloring);
       ISColoringDestroy(&iscoloring);
 
-      SNESSetJacobian(snes, Jmf, Jmf, SNESComputeJacobianDefaultColor, fdcoloring);
+      SNESSetJacobian(snes, Jmf, Jmf, SNESComputeJacobianScaledColor, fdcoloring);
     } else {
       // Brute force calculation
 
@@ -564,9 +644,9 @@ int SNESSolver::init() {
           BoutComm::get(), nlocal, nlocal,  // Local sizes
           PETSC_DETERMINE, PETSC_DETERMINE, // Global sizes
           3, // Number of nonzero entries in diagonal portion of local submatrix
-          PETSC_NULL,
+          nullptr,
           0, // Number of nonzeros per row in off-diagonal portion of local submatrix
-          PETSC_NULL, &Jmf);
+          nullptr, &Jmf);
 #if PETSC_VERSION_GE(3, 4, 0)
       SNESSetJacobian(snes, Jmf, Jmf, SNESComputeJacobianDefault, this);
 #else
@@ -633,6 +713,15 @@ int SNESSolver::init() {
     // Set PC type from input
     if (pc_type != "default") {
       PCSetType(pc, pc_type.c_str());
+
+      if (pc_type == "hypre") {
+#if PETSC_HAVE_HYPRE
+        // Set the type of hypre preconditioner
+        PCHYPRESetType(pc, pc_hypre_type.c_str());
+#else
+        throw BoutException("PETSc was not configured with Hypre.");
+#endif
+      }
     }
   }
 
@@ -748,6 +837,12 @@ int SNESSolver::run() {
         // Restore state
         VecCopy(x0, snes_x);
 
+        // Recalculate the Jacobian
+        if (saved_jacobian_lag == 0) {
+          SNESGetLagJacobian(snes, &saved_jacobian_lag);
+          SNESSetLagJacobian(snes, 1);
+        }
+
         // Check lock state
         PetscInt lock_state;
         VecLockGet(snes_x, &lock_state);
@@ -921,6 +1016,12 @@ PetscErrorCode SNESSolver::snes_function(Vec x, Vec f, bool linear) {
   }
   };
 
+  if (scale_rhs) {
+    // f <- f * rhs_scaling_factors
+    ierr = VecPointwiseMult(f, f, rhs_scaling_factors);
+    CHKERRQ(ierr);
+  }
+
   return 0;
 }
 
@@ -967,4 +1068,72 @@ PetscErrorCode SNESSolver::precon(Vec x, Vec f) {
   return 0;
 }
 
+PetscErrorCode SNESSolver::scaleJacobian(Mat B) {
+  if (!scale_rhs) {
+    return 0; // Not scaling the RHS values
+  }
+
+  int ierr;
+
+  // Get index of rows owned by this processor
+  int rstart, rend;
+  MatGetOwnershipRange(B, &rstart, &rend);
+
+  // Check that the vector has the same ownership range
+  int istart, iend;
+  VecGetOwnershipRange(jac_row_inv_norms, &istart, &iend);
+  if ((rstart != istart) or (rend != iend)) {
+    throw BoutException("Ownership ranges different: [{}, {}) and [{}, {})\n", rstart,
+                        rend, istart, iend);
+  }
+
+  // Calculate the norm of each row of the Jacobian
+  PetscScalar* row_inv_norm_data;
+  ierr = VecGetArray(jac_row_inv_norms, &row_inv_norm_data);
+  CHKERRQ(ierr);
+
+  PetscInt ncols;
+  const PetscScalar* vals;
+  for (int row = rstart; row < rend; row++) {
+    MatGetRow(B, row, &ncols, nullptr, &vals);
+
+    // Calculate a norm of this row of the Jacobian
+    PetscScalar norm = 0.0;
+    for (int col = 0; col < ncols; col++) {
+      PetscScalar absval = std::abs(vals[col]);
+      if (absval > norm) {
+        norm = absval;
+      }
+      // Can we identify small elements and remove them?
+      // so we don't need to calculate them next time
+    }
+
+    // Store in the vector as 1 / norm
+    row_inv_norm_data[row - rstart] = 1. / norm;
+
+    MatRestoreRow(B, row, &ncols, nullptr, &vals);
+  }
+
+  ierr = VecRestoreArray(jac_row_inv_norms, &row_inv_norm_data);
+  CHKERRQ(ierr);
+
+  // Modify the RHS scaling: factor = factor / norm
+  ierr = VecPointwiseMult(rhs_scaling_factors, rhs_scaling_factors, jac_row_inv_norms);
+  CHKERRQ(ierr);
+
+  if (diagnose) {
+    // Print maximum and minimum scaling factors
+    PetscReal max_scale, min_scale;
+    VecMax(rhs_scaling_factors, nullptr, &max_scale);
+    VecMin(rhs_scaling_factors, nullptr, &min_scale);
+    output.write("RHS scaling: {} -> {}\n", min_scale, max_scale);
+  }
+
+  // Scale the Jacobian rows by multiplying on the left by 1/norm
+  ierr = MatDiagonalScale(B, jac_row_inv_norms, nullptr);
+  CHKERRQ(ierr);
+
+  return 0;
+}
+
 #endif // BOUT_HAS_PETSC
diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx
index c00f4b2581..871ccad53e 100644
--- a/src/solver/impls/snes/snes.hxx
+++ b/src/solver/impls/snes/snes.hxx
@@ -82,6 +82,12 @@ public:
   /// @param[out] f  The result of the operation
   PetscErrorCode precon(Vec x, Vec f);
 
+  /// Scale an approximate Jacobian,
+  /// and update the internal RHS scaling factors
+  /// This is called by SNESComputeJacobianScaledColor with the
+  /// finite difference approximated Jacobian.
+  PetscErrorCode scaleJacobian(Mat B);
+
 private:
   BoutReal timestep;     ///< Internal timestep
   BoutReal dt;           ///< Current timestep used in snes_function
@@ -125,10 +131,15 @@ private:
   bool kspsetinitialguessnonzero; ///< Set initial guess to non-zero
   int maxl;                       ///< Maximum linear iterations
   std::string pc_type;            ///< Preconditioner type
+  std::string pc_hypre_type;      ///< Hypre preconditioner type
   std::string line_search_type;   ///< Line search type
   bool matrix_free;               ///< Use matrix free Jacobian
   int lag_jacobian;               ///< Re-use Jacobian
   bool use_coloring;              ///< Use matrix coloring
+
+  bool scale_rhs;          ///< Scale time derivatives?
+  Vec rhs_scaling_factors; ///< Factors to multiply RHS function
+  Vec jac_row_inv_norms;   ///< 1 / Norm of the rows of the Jacobian
 };
 
 #else
diff --git a/src/sys/boutcomm.cxx b/src/sys/boutcomm.cxx
index ecaeb866bb..2d78a24076 100644
--- a/src/sys/boutcomm.cxx
+++ b/src/sys/boutcomm.cxx
@@ -25,7 +25,7 @@ void BoutComm::setComm(MPI_Comm c) {
   hasBeenSet = true;
 }
 
-MPI_Comm BoutComm::getComm() {
+MPI_Comm& BoutComm::getComm() {
   if (comm == MPI_COMM_NULL) {
     // No communicator set. Initialise MPI
     MPI_Init(pargc, pargv);
@@ -39,7 +39,7 @@ MPI_Comm BoutComm::getComm() {
 bool BoutComm::isSet() { return hasBeenSet; }
 
 // Static functions below. Must use getInstance()
-MPI_Comm BoutComm::get() { return getInstance()->getComm(); }
+MPI_Comm& BoutComm::get() { return getInstance()->getComm(); }
 
 void BoutComm::setArgs(int& c, char**& v) {
   getInstance()->pargc = &c;
diff --git a/src/sys/output.cxx b/src/sys/output.cxx
index 0ab1d2e0c8..c07cd53474 100644
--- a/src/sys/output.cxx
+++ b/src/sys/output.cxx
@@ -109,5 +109,3 @@ ConditionalOutput output_progress(Output::getInstance());
 ConditionalOutput output_error(Output::getInstance());
 ConditionalOutput output_verbose(Output::getInstance(), false);
 ConditionalOutput output(Output::getInstance());
-
-#undef bout_vsnprint_pre
diff --git a/src/sys/petsclib.cxx b/src/sys/petsclib.cxx
index 91db0c44ea..f1ced63a21 100644
--- a/src/sys/petsclib.cxx
+++ b/src/sys/petsclib.cxx
@@ -23,16 +23,18 @@ PetscLib::PetscLib(Options* opt) {
     if (count == 0) {
       // Initialise PETSc
 
+      // Load global PETSc options from the [petsc] section of the input
+      // Note: This should be before PetscInitialize so that some options
+      //       can modify initialization e.g. -log_view.
+      setPetscOptions(Options::root()["petsc"], "");
+
       output << "Initialising PETSc\n";
       PETSC_COMM_WORLD = BoutComm::getInstance()->getComm();
-      PetscInitialize(pargc, pargv, PETSC_NULL, help);
+      PetscInitialize(pargc, pargv, nullptr, help);
       PetscPopSignalHandler();
 
       PetscLogEventRegister("Total BOUT++", 0, &USER_EVENT);
       PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0);
-
-      // Load global PETSc options from the [petsc] section of the input
-      setPetscOptions(Options::root()["petsc"], "");
     }
 
     if (opt != nullptr and opt->isSection()) {
diff --git a/src/sys/slepclib.cxx b/src/sys/slepclib.cxx
index 1dc89bc047..79fdfa9bbc 100644
--- a/src/sys/slepclib.cxx
+++ b/src/sys/slepclib.cxx
@@ -18,7 +18,7 @@ SlepcLib::SlepcLib() {
     // Initialise SLEPc
 
     output << "Initialising SLEPc\n";
-    SlepcInitialize(pargc, pargv, PETSC_NULL, help);
+    SlepcInitialize(pargc, pargv, nullptr, help);
     PetscLogEventRegister("Total BOUT++", 0, &USER_EVENT);
     PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0);
   }
diff --git a/tests/integrated/test-bout-override-default-option/test-bout-override-default-option.cxx b/tests/integrated/test-bout-override-default-option/test-bout-override-default-option.cxx
index 24cb6bc0b2..4c50579c47 100644
--- a/tests/integrated/test-bout-override-default-option/test-bout-override-default-option.cxx
+++ b/tests/integrated/test-bout-override-default-option/test-bout-override-default-option.cxx
@@ -1,4 +1,4 @@
-#include <options.hxx>
+#include <bout/options.hxx>
 
 // Use an integrated test for what is effectively a unit test of the
 // BOUT_OVERRIDE_DEFAULT_OPTION() macro because the functionality relies on the state of
diff --git a/tests/integrated/test-laplace-hypre3d/test-laplace3d.cxx b/tests/integrated/test-laplace-hypre3d/test-laplace3d.cxx
index 69524d44da..64b5dec1d4 100644
--- a/tests/integrated/test-laplace-hypre3d/test-laplace3d.cxx
+++ b/tests/integrated/test-laplace-hypre3d/test-laplace3d.cxx
@@ -125,8 +125,13 @@ int main(int argc, char** argv) {
   Field3D error = rhs_check - rhs;
   BoutReal error_max = max(abs(error), true);
 
-  SAVE_ONCE(f, rhs, rhs_check, error, error_max);
-  bout::globals::dump.write();
+  Options dump;
+  dump["f"] = f;
+  dump["rhs"] = rhs;
+  dump["rhs_check"] = rhs_check;
+  dump["error"] = error;
+  dump["error_max"] = error_max;
+  bout::writeDefaultOutputFile(dump);
 
   laplace_solver.reset(nullptr);
   BoutFinalise();
diff --git a/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp b/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp
index ae7cfaa5b4..6474b2604b 100644
--- a/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp
+++ b/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp
@@ -76,6 +76,7 @@ atol = 1e-13
 
 [laplace:petsc]
 mg_levels_ksp_max_it = 3
+mg_levels_pc_type = sor
 
 [input]
 transform_from_field_aligned = false
diff --git a/tests/integrated/test-laplacexy/test-laplacexy.cxx b/tests/integrated/test-laplacexy/test-laplacexy.cxx
index 62449a76ee..3142d359b1 100644
--- a/tests/integrated/test-laplacexy/test-laplacexy.cxx
+++ b/tests/integrated/test-laplacexy/test-laplacexy.cxx
@@ -30,27 +30,20 @@
 #include <bout/invert/laplacexy.hxx>
 #include <bout/options.hxx>
 
-using bout::globals::dump;
 using bout::globals::mesh;
 
 int main(int argc, char** argv) {
 
   BoutInitialise(argc, argv);
 
-  auto coords = mesh->getCoordinates();
-
-  auto& opt = Options::root();
-
-  LaplaceXY laplacexy;
-
-  bool include_y_derivs = opt["laplacexy"]["include_y_derivs"];
+  auto* coords = mesh->getCoordinates();
 
   // Solving equations of the form
   // Div(A Grad_perp(f)) + B*f = rhs
   // A*Laplace_perp(f) + Grad_perp(A).Grad_perp(f) + B*f = rhs
-  Field2D f, a, b, sol;
-  Field2D error, absolute_error; //Absolute value of relative error: abs((f - sol)/f)
-  BoutReal max_error;            //Output of test
+  Field2D f;
+  Field2D a;
+  Field2D b;
 
   initial_profile("f", f);
   initial_profile("a", a);
@@ -63,42 +56,45 @@ int main(int argc, char** argv) {
 
   ////////////////////////////////////////////////////////////////////////////////////////
 
-  Field2D rhs, rhs_check;
+  Field2D rhs;
+  const bool include_y_derivs = Options::root()["laplacexy"]["include_y_derivs"];
   if (include_y_derivs) {
     rhs = a * Laplace_perp(f) + Grad_perp(a) * Grad_perp(f) + b * f;
   } else {
     rhs = a * Delp2(f, CELL_DEFAULT, false) + coords->g11 * DDX(a) * DDX(f) + b * f;
   }
 
+  LaplaceXY laplacexy;
   laplacexy.setCoefs(a, b);
 
-  sol = laplacexy.solve(rhs, 0.);
-  error = (f - sol) / f;
-  absolute_error = f - sol;
-  max_error = max(abs(absolute_error), true);
+  Field2D solution = laplacexy.solve(rhs, 0.);
+  Field2D relative_error = (f - solution) / f;
+  Field2D absolute_error = f - solution;
+  BoutReal max_error = max(abs(absolute_error), true);
 
-  output << "Magnitude of maximum absolute error is " << max_error << endl;
+  output.write("Magnitude of maximum absolute error is {}\n", max_error);
 
-  mesh->communicate(sol);
+  mesh->communicate(solution);
+  Field2D rhs_check;
   if (include_y_derivs) {
-    rhs_check = a * Laplace_perp(sol) + Grad_perp(a) * Grad_perp(sol) + b * sol;
-  } else {
     rhs_check =
-        a * Delp2(sol, CELL_DEFAULT, false) + coords->g11 * DDX(a) * DDX(sol) + b * sol;
+        a * Laplace_perp(solution) + Grad_perp(a) * Grad_perp(solution) + b * solution;
+  } else {
+    rhs_check = a * Delp2(solution, CELL_DEFAULT, false)
+                + coords->g11 * DDX(a) * DDX(solution) + b * solution;
   }
 
-  dump.add(a, "a");
-  dump.add(b, "b");
-  dump.add(f, "f");
-  dump.add(sol, "sol");
-  dump.add(error, "error");
-  dump.add(absolute_error, "absolute_error");
-  dump.add(max_error, "max_error");
-  dump.add(rhs, "rhs");
-  dump.add(rhs_check, "rhs_check");
-
-  dump.write();
-  dump.close();
+  Options dump;
+  dump["a"] = a;
+  dump["b"] = b;
+  dump["f"] = f;
+  dump["sol"] = solution;
+  dump["relative_error"] = relative_error;
+  dump["absolute_error"] = absolute_error;
+  dump["max_error"] = max_error;
+  dump["rhs"] = rhs;
+  dump["rhs_check"] = rhs_check;
+  bout::writeDefaultOutputFile(dump);
 
   MPI_Barrier(BoutComm::get()); // Wait for all processors to write data
 
diff --git a/tests/integrated/test-laplacexy2-hypre/test-laplacexy.cxx b/tests/integrated/test-laplacexy2-hypre/test-laplacexy.cxx
index 77fbdb197c..0ae2cbf312 100644
--- a/tests/integrated/test-laplacexy2-hypre/test-laplacexy.cxx
+++ b/tests/integrated/test-laplacexy2-hypre/test-laplacexy.cxx
@@ -59,8 +59,8 @@ int main(int argc, char** argv) {
   Field2D guess = 0.0;
   Field2D sol = laplacexy.solve(rhs, guess);
   Field2D error = (f - sol) / f;
-  Field2D absolute_error =
-      abs(f - sol); // Absolute value of relative error: abs((f - sol)/f)
+  // Absolute value of relative error: abs((f - sol)/f)
+  Field2D absolute_error = abs(f - sol);
   BoutReal max_error = max(absolute_error, true);
 
   output << "Magnitude of maximum absolute error is " << max_error << endl;
@@ -68,20 +68,17 @@ int main(int argc, char** argv) {
   sol.getMesh()->communicate(sol);
   Field2D rhs_check = Laplace_perpXY(a, sol);
 
-  using bout::globals::dump;
-
-  dump.add(a, "a");
-  dump.add(b, "b");
-  dump.add(f, "f");
-  dump.add(sol, "sol");
-  dump.add(error, "error");
-  dump.add(absolute_error, "absolute_error");
-  dump.add(max_error, "max_error");
-  dump.add(rhs, "rhs");
-  dump.add(rhs_check, "rhs_check");
-
-  dump.write();
-  dump.close();
+  Options dump;
+  dump["a"] = a;
+  dump["b"] = b;
+  dump["f"] = f;
+  dump["sol"] = sol;
+  dump["error"] = error;
+  dump["absolute_error"] = absolute_error;
+  dump["max_error"] = max_error;
+  dump["rhs"] = rhs;
+  dump["rhs_check"] = rhs_check;
+  bout::writeDefaultOutputFile(dump);
 
   MPI_Barrier(BoutComm::get()); // Wait for all processors to write data
 
diff --git a/tests/integrated/test-squash/runtest b/tests/integrated/test-squash/runtest
index 09bf2f1d14..692d561c59 100755
--- a/tests/integrated/test-squash/runtest
+++ b/tests/integrated/test-squash/runtest
@@ -7,6 +7,7 @@ import numpy as np
 from boututils.run_wrapper import launch_safe, shell_safe, build_and_log
 import argparse
 import re
+import os.path
 
 
 # requires: all_tests
@@ -81,7 +82,7 @@ def verify(f1, f2):
 
 parser = argparse.ArgumentParser(description="Test the bout-squashoutput wrapper")
 parser.add_argument(
-    "executable", help="Path to bout-squashoutput", default="../../../bin"
+    "executable", help="Path to bout-squashoutput", default="../../../bin", nargs="?"
 )
 args = parser.parse_args()
 
@@ -89,6 +90,9 @@ build_and_log("Squash test")
 
 bout_squashoutput = args.executable + "/bout-squashoutput"
 
+if not os.path.exists(bout_squashoutput):
+    bout_squashoutput = "bout-squashoutput"
+
 print("Run once to get normal data")
 timed_shell_safe("./squash -q -q -q nout=2")
 timed_shell_safe("mv data/BOUT.dmp.0.nc f1.nc")
diff --git a/tests/unit/invert/laplace/test_laplace_petsc3damg.cxx b/tests/unit/invert/laplace/test_laplace_petsc3damg.cxx
index 832ccc8e6b..b2831808a4 100644
--- a/tests/unit/invert/laplace/test_laplace_petsc3damg.cxx
+++ b/tests/unit/invert/laplace/test_laplace_petsc3damg.cxx
@@ -99,7 +99,7 @@ class Petsc3dAmgTest
 public:
   WithQuietOutput info{output_info}, warn{output_warn}, progress{output_progress},
       all{output};
-  Petsc3dAmgTest() : FakeMeshFixture(), solver(getOptions(GetParam())) {
+  Petsc3dAmgTest() : solver(&getOptions(GetParam())) {
     PetscErrorPrintf = PetscErrorPrintfNone;
     int nx = mesh->GlobalNx, ny = mesh->GlobalNy, nz = mesh->GlobalNz;
     static_cast<FakeMesh*>(bout::globals::mesh)
@@ -139,20 +139,23 @@ class Petsc3dAmgTest
   ForwardOperator forward;
 
 private:
-  static Options* getOptions(std::tuple<bool, bool, bool, bool> param) {
-    Options* options = Options::getRoot()->getSection("laplace");
-    (*options)["type"] = "petsc3damg";
-    (*options)["inner_boundary_flags"] =
+  static Options& getOptions(std::tuple<bool, bool, bool, bool> param) {
+    auto& options = Options::root()["laplace"];
+    options["type"] = "petsc3damg";
+    options["inner_boundary_flags"] =
         (std::get<0>(param) ? INVERT_AC_GRAD : 0) + INVERT_RHS;
-    (*options)["outer_boundary_flags"] =
+    options["outer_boundary_flags"] =
         (std::get<1>(param) ? INVERT_AC_GRAD : 0) + INVERT_RHS;
-    (*options)["lower_boundary_flags"] =
+    options["lower_boundary_flags"] =
         (std::get<2>(param) ? INVERT_AC_GRAD : 0) + INVERT_RHS;
-    (*options)["upper_boundary_flags"] =
+    options["upper_boundary_flags"] =
         (std::get<3>(param) ? INVERT_AC_GRAD : 0) + INVERT_RHS;
-    (*options)["fourth_order"] = false;
-    (*options)["atol"] = tol / 30; // Need to specify smaller than desired tolerance to
-    (*options)["rtol"] = tol / 30; // ensure it is satisfied for every element.
+    options["fourth_order"] = false;
+    options["atol"] = tol / 30; // Need to specify smaller than desired tolerance to
+    options["rtol"] = tol / 30; // ensure it is satisfied for every element.
+    auto& petsc_options = options["petsc"];
+    petsc_options["mg_levels_ksp_max_it"] = 4;
+    petsc_options["mg_levels_pc_type"] = "sor";
     return options;
   }
 };
diff --git a/tools/pylib/_boutpp_build/backend.py b/tools/pylib/_boutpp_build/backend.py
index 65a15c913c..b5ad2c6e95 100644
--- a/tools/pylib/_boutpp_build/backend.py
+++ b/tools/pylib/_boutpp_build/backend.py
@@ -31,13 +31,20 @@ def getversion():
         _bout_next_version = "5.1.0"
 
         try:
-            tmp = run2(f"git describe --tags --match={_bout_previous_version}").strip()
-            tmp = re.sub(f"{_bout_previous_version}-", f"{_bout_next_version}.dev", tmp)
-            if useLocalVersion:
-                tmp = re.sub("-", "+", tmp)
-            else:
-                tmp = re.sub("-.*", "", tmp)
-            version = tmp
+            try:
+                version = run2("git describe --exact-match --tags HEAD").strip()
+            except subprocess.CalledProcessError:
+                tmp = run2(
+                    f"git describe --tags --match={_bout_previous_version}"
+                ).strip()
+                tmp = re.sub(
+                    f"{_bout_previous_version}-", f"{_bout_next_version}.dev", tmp
+                )
+                if useLocalVersion:
+                    tmp = re.sub("-", "+", tmp)
+                else:
+                    tmp = re.sub("-.*", "", tmp)
+                version = tmp
             with open("_version.txt", "w") as f:
                 f.write(version + "\n")
         except subprocess.CalledProcessError:
diff --git a/tools/pylib/_boutpp_build/boutpp.pyx.jinja b/tools/pylib/_boutpp_build/boutpp.pyx.jinja
index 0d8cef173a..657e2f28c1 100644
--- a/tools/pylib/_boutpp_build/boutpp.pyx.jinja
+++ b/tools/pylib/_boutpp_build/boutpp.pyx.jinja
@@ -1500,7 +1500,7 @@ def interp_to(Field3D f3d,location):
 def setOption(name, value, source="PyInterface", force=False):
     """
     Set an option in the global Options tree. Prefer
-    `Options.set` to avoid unexpected results if several Option
+    ``Options.set`` to avoid unexpected results if several Option
     roots are avalaible.
 
     Parameters