diff --git a/manual/sphinx/user_docs/time_integration.rst b/manual/sphinx/user_docs/time_integration.rst index 7d0b56abcc..a4db823369 100644 --- a/manual/sphinx/user_docs/time_integration.rst +++ b/manual/sphinx/user_docs/time_integration.rst @@ -393,7 +393,9 @@ iterations within a given range. +---------------------------+---------------+----------------------------------------------------+ | predictor | true | Use linear predictor? | +---------------------------+---------------+----------------------------------------------------+ -| matrix_free | false | Use matrix free Jacobian-vector product? | +| matrix_free | false | Matrix-free preconditioning? | ++---------------------------+---------------+----------------------------------------------------+ +| matrix_free_operator | false | Use matrix free Jacobian-vector product? | +---------------------------+---------------+----------------------------------------------------+ | use_coloring | true | If ``matrix_free=false``, use coloring to speed up | | | | calculation of the Jacobian elements. | @@ -402,11 +404,18 @@ iterations within a given range. +---------------------------+---------------+----------------------------------------------------+ | kspsetinitialguessnonzero | false | If true, Use previous solution as KSP initial | +---------------------------+---------------+----------------------------------------------------+ -| use_precon | false | Use user-supplied preconditioner? | +| use_precon | false | If ``matrix_free=true``, use user-supplied | +| | | preconditioner? | | | | If false, the default PETSc preconditioner is used | +---------------------------+---------------+----------------------------------------------------+ | diagnose | false | Print diagnostic information every iteration | +---------------------------+---------------+----------------------------------------------------+ +| stencil:cross | 0 | If ``matrix_free=false`` and ``use_coloring=true`` | +| stencil:square | 0 | Set the size and shape of the Jacobian coloring | +| stencil:taxi | 2 | stencil. | ++---------------------------+---------------+----------------------------------------------------+ +| force_symmetric_coloring | false | Ensure that the Jacobian coloring is symmetric | ++---------------------------+---------------+----------------------------------------------------+ The predictor is linear extrapolation from the last two timesteps. It seems to be effective, but can be disabled by setting ``predictor = false``. @@ -444,6 +453,51 @@ Preconditioner types: Enable with command-line args ``-pc_type hypre -pc_hypre_type euclid -pc_hypre_euclid_levels k`` where ``k`` is the level (1-8 typically). +Jacobian coloring stencil +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The stencil used to create the Jacobian colouring can be varied, +depending on which numerical operators are in use. + + +``solver:stencil:cross = N`` +e.g. for N == 2 + +.. code-block:: bash + + * + * + * * x * * + * + * + + +``solver:stencil:square = N`` +e.g. for N == 2 + +.. code-block:: bash + + * * * * * + * * * * * + * * x * * + * * * * * + * * * * * + +``solver:stencil:taxi = N`` +e.g. for N == 2 + +.. code-block:: bash + + * + * * * + * * x * * + * * * + * + +Setting ``solver:force_symmetric_coloring = true``, will make sure +that the jacobian colouring matrix is symmetric. This will often +include a few extra non-zeros that the stencil will miss otherwise + ODE integration --------------- diff --git a/src/solver/impls/snes/snes.cxx b/src/solver/impls/snes/snes.cxx index e382bbd3f8..4dce1e7086 100644 --- a/src/solver/impls/snes/snes.cxx +++ b/src/solver/impls/snes/snes.cxx @@ -1,4 +1,4 @@ -#include "bout/build_defines.hxx" +#include "bout/build_config.hxx" #if BOUT_HAS_PETSC @@ -6,18 +6,56 @@ #include #include +#include #include #include -#include #include #include #include -#include "petscmat.h" #include "petscsnes.h" +class ColoringStencil { +private: + bool static isInSquare(int const i, int const j, int const n_square) { + return std::abs(i) <= n_square && std::abs(j) <= n_square; + } + bool static isInCross(int const i, int const j, int const n_cross) { + if (i == 0) { + return std::abs(j) <= n_cross; + } + if (j == 0) { + return std::abs(i) <= n_cross; + } + return false; + } + bool static isInTaxi(int const i, int const j, int const n_taxi) { + return std::abs(i) + std::abs(j) <= n_taxi; + } + +public: + auto static getOffsets(int n_square, int n_taxi, int n_cross) { + ASSERT2(n_square >= 0 && n_cross >= 0 && n_taxi >= 0 + && n_square + n_cross + n_taxi > 0); + auto inside = [&](int i, int j) { + return isInSquare(i, j, n_square) || isInTaxi(i, j, n_taxi) + || isInCross(i, j, n_cross); + }; + std::vector> xy_offsets; + auto loop_bound = std::max({n_square, n_taxi, n_cross}); + for (int i = -loop_bound; i <= loop_bound; ++i) { + for (int j = -loop_bound; j <= loop_bound; ++j) { + if (inside(i, j)) { + xy_offsets.emplace_back(i, j); + } + } + } + return xy_offsets; + } +}; + /* * PETSc callback function, which evaluates the nonlinear * function to be solved by SNES. @@ -122,7 +160,7 @@ SNESSolver::SNESSolver(Options* opts) .withDefault(false)), matrix_free_operator((*options)["matrix_free_operator"] .doc("Use matrix free Jacobian-vector operator?") - .withDefault(true)), + .withDefault(false)), lag_jacobian((*options)["lag_jacobian"] .doc("Re-use the Jacobian this number of SNES iterations") .withDefault(50)), @@ -279,225 +317,164 @@ int SNESSolver::init() { output_progress.write("Setting Jacobian matrix sizes\n"); - int n2d = f2d.size(); - int n3d = f3d.size(); + const int n2d = f2d.size(); + const int n3d = f3d.size(); // Set size of Matrix on each processor to nlocal x nlocal MatCreate(BoutComm::get(), &Jfd); MatSetSizes(Jfd, nlocal, nlocal, PETSC_DETERMINE, PETSC_DETERMINE); MatSetFromOptions(Jfd); - - std::vector d_nnz(nlocal); - std::vector o_nnz(nlocal); - - // Set values for most points - const int ncells_x = (mesh->LocalNx > 1) ? 2 : 0; - const int ncells_y = (mesh->LocalNy > 1) ? 2 : 0; - const int ncells_z = (mesh->LocalNz > 1) ? 2 : 0; - - const auto star_pattern = (1 + ncells_x + ncells_y) * (n3d + n2d) + ncells_z * n3d; - - // Offsets. Start with the central cell - std::vector> xyoffsets{{0, 0}}; - if (ncells_x != 0) { - // Stencil includes points in X - xyoffsets.push_back({-1, 0}); - xyoffsets.push_back({1, 0}); - } - if (ncells_y != 0) { - // Stencil includes points in Y - xyoffsets.push_back({0, -1}); - xyoffsets.push_back({0, 1}); + // Determine which row/columns of the matrix are locally owned + int Istart, Iend; + MatGetOwnershipRange(Jfd, &Istart, &Iend); + // Convert local into global indices + // Note: Not in the boundary cells, to keep -1 values + for (const auto& i : mesh->getRegion3D("RGN_NOBNDRY")) { + index[i] += Istart; } + // Now communicate to fill guard cells + mesh->communicate(index); - output_info.write("Star pattern: {} non-zero entries\n", star_pattern); - for (int i = 0; i < nlocal; i++) { - // Non-zero elements on this processor - d_nnz[i] = star_pattern; - // Non-zero elements on neighboring processor - o_nnz[i] = 0; + // Non-zero elements on this processor + std::vector d_nnz; + std::vector o_nnz; + auto n_square = (*options)["stencil:square"] + .doc("Extent of stencil (square)") + .withDefault(0); + auto n_taxi = (*options)["stencil:taxi"] + .doc("Extent of stencil (taxi-cab norm)") + .withDefault(0); + auto n_cross = (*options)["stencil:cross"] + .doc("Extent of stencil (cross)") + .withDefault(0); + //Set n_taxi 2 if nothing else is set + //Probably a better way to do this + if (n_square == 0 && n_taxi == 0 && n_cross == 0) { + output_info.write("Setting solver:stencil:taxi = 2\n"); + n_taxi = 2; } - // X boundaries - if (ncells_x != 0) { - if (mesh->firstX()) { - // Lower X boundary - for (int y = mesh->ystart; y <= mesh->yend; y++) { - for (int z = 0; z < mesh->LocalNz; z++) { - const int localIndex = ROUND(index(mesh->xstart, y, z)); - ASSERT2((localIndex >= 0) && (localIndex < nlocal)); - const int num_fields = (z == 0) ? n2d + n3d : n3d; - for (int i = 0; i < num_fields; i++) { - d_nnz[localIndex + i] -= (n3d + n2d); - } - } - } - } else { - // On another processor - for (int y = mesh->ystart; y <= mesh->yend; y++) { - for (int z = 0; z < mesh->LocalNz; z++) { - const int localIndex = ROUND(index(mesh->xstart, y, z)); - ASSERT2((localIndex >= 0) && (localIndex < nlocal)); - const int num_fields = (z == 0) ? n2d + n3d : n3d; - for (int i = 0; i < num_fields; i++) { - d_nnz[localIndex + i] -= (n3d + n2d); - o_nnz[localIndex + i] += (n3d + n2d); - } - } - } - } - if (mesh->lastX()) { - // Upper X boundary - for (int y = mesh->ystart; y <= mesh->yend; y++) { - for (int z = 0; z < mesh->LocalNz; z++) { - const int localIndex = ROUND(index(mesh->xend, y, z)); - ASSERT2((localIndex >= 0) && (localIndex < nlocal)); - const int num_fields = (z == 0) ? n2d + n3d : n3d; - for (int i = 0; i < num_fields; i++) { - d_nnz[localIndex + i] -= (n3d + n2d); - } - } - } - } else { - // On another processor + auto const xy_offsets = ColoringStencil::getOffsets(n_square, n_taxi, n_cross); + { + // This is ugly but can't think of a better and robust way to + // count the non-zeros for some arbitrary stencil + // effectively the same loop as the one that sets the non-zeros below + std::vector> d_nnz_map2d(nlocal); + std::vector> o_nnz_map2d(nlocal); + std::vector> d_nnz_map3d(nlocal); + std::vector> o_nnz_map3d(nlocal); + // Loop over every element in 2D to count the *unique* non-zeros + for (int x = mesh->xstart; x <= mesh->xend; x++) { for (int y = mesh->ystart; y <= mesh->yend; y++) { - for (int z = 0; z < mesh->LocalNz; z++) { - const int localIndex = ROUND(index(mesh->xend, y, z)); - ASSERT2((localIndex >= 0) && (localIndex < nlocal)); - const int num_fields = (z == 0) ? n2d + n3d : n3d; - for (int i = 0; i < num_fields; i++) { - d_nnz[localIndex + i] -= (n3d + n2d); - o_nnz[localIndex + i] += (n3d + n2d); - } - } - } - } - } - // Y boundaries - if (ncells_y != 0) { - for (int x = mesh->xstart; x <= mesh->xend; x++) { - // Default to no boundary - // NOTE: This assumes that communications in Y are to other - // processors. If Y is communicated with this processor (e.g. NYPE=1) - // then this will result in PETSc warnings about out of range allocations - - // z = 0 case - int localIndex = ROUND(index(x, mesh->ystart, 0)); - ASSERT2(localIndex >= 0); - - // All 2D and 3D fields - for (int i = 0; i < n2d + n3d; i++) { - o_nnz[localIndex + i] += (n3d + n2d); - d_nnz[localIndex + i] -= (n3d + n2d); - } + const int ind0 = ROUND(index(x, y, 0)) - Istart; - for (int z = 1; z < mesh->LocalNz; z++) { - localIndex = ROUND(index(x, mesh->ystart, z)); + // 2D fields + for (int i = 0; i < n2d; i++) { + const PetscInt row = ind0 + i; + // Loop through each point in the stencil + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) + || (yi >= mesh->LocalNy)) { + continue; + } - // Only 3D fields - for (int i = 0; i < n3d; i++) { - o_nnz[localIndex + i] += (n3d + n2d); - d_nnz[localIndex + i] -= (n3d + n2d); + const int ind2 = ROUND(index(xi, yi, 0)); + if (ind2 < 0) { + continue; // A boundary point + } + + // Depends on all variables on this cell + for (int j = 0; j < n2d; j++) { + const PetscInt col = ind2 + j; + if (col >= Istart && col < Iend) { + d_nnz_map2d[row].insert(col); + } else { + o_nnz_map2d[row].insert(col); + } + } + } } - } + // 3D fields + for (int z = 0; z < mesh->LocalNz; z++) { + const int ind = ROUND(index(x, y, z)) - Istart; - // z = 0 case - localIndex = ROUND(index(x, mesh->yend, 0)); - // All 2D and 3D fields - for (int i = 0; i < n2d + n3d; i++) { - o_nnz[localIndex + i] += (n3d + n2d); - d_nnz[localIndex + i] -= (n3d + n2d); - } + for (int i = 0; i < n3d; i++) { + PetscInt row = ind + i; + if (z == 0) { + row += n2d; + } - for (int z = 1; z < mesh->LocalNz; z++) { - localIndex = ROUND(index(x, mesh->yend, z)); + // Depends on 2D fields + for (int j = 0; j < n2d; j++) { + const PetscInt col = ind0 + j; + if (col >= Istart && col < Iend) { + d_nnz_map2d[row].insert(col); + } else { + o_nnz_map2d[row].insert(col); + } + } - // Only 3D fields - for (int i = 0; i < n3d; i++) { - o_nnz[localIndex + i] += (n3d + n2d); - d_nnz[localIndex + i] -= (n3d + n2d); - } - } - } + // Star pattern + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; - for (RangeIterator it = mesh->iterateBndryLowerY(); !it.isDone(); it++) { - // A boundary, so no communication + if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) + || (yi >= mesh->LocalNy)) { + continue; + } - // z = 0 case - int localIndex = ROUND(index(it.ind, mesh->ystart, 0)); - if (localIndex < 0) { - // This can occur because it.ind includes values in x boundary e.g. x=0 - continue; - } - // All 2D and 3D fields - for (int i = 0; i < n2d + n3d; i++) { - o_nnz[localIndex + i] -= (n3d + n2d); - } + int ind2 = ROUND(index(xi, yi, 0)); + if (ind2 < 0) { + continue; // Boundary point + } - for (int z = 1; z < mesh->LocalNz; z++) { - int localIndex = ROUND(index(it.ind, mesh->ystart, z)); + if (z == 0) { + ind2 += n2d; + } - // Only 3D fields - for (int i = 0; i < n3d; i++) { - o_nnz[localIndex + i] -= (n3d + n2d); + // 3D fields on this cell + for (int j = 0; j < n3d; j++) { + const PetscInt col = ind2 + j; + if (col >= Istart && col < Iend) { + d_nnz_map3d[row].insert(col); + } else { + o_nnz_map3d[row].insert(col); + } + } + } + } } } } - for (RangeIterator it = mesh->iterateBndryUpperY(); !it.isDone(); it++) { - // A boundary, so no communication + d_nnz.reserve(nlocal); + d_nnz.reserve(nlocal); - // z = 0 case - const int localIndex = ROUND(index(it.ind, mesh->yend, 0)); - if (localIndex < 0) { - continue; // Out of domain - } - - // All 2D and 3D fields - for (int i = 0; i < n2d + n3d; i++) { - o_nnz[localIndex + i] -= (n3d + n2d); - } - - for (int z = 1; z < mesh->LocalNz; z++) { - const int localIndex = ROUND(index(it.ind, mesh->yend, z)); - - // Only 3D fields - for (int i = 0; i < n3d; i++) { - o_nnz[localIndex + i] -= (n3d + n2d); - } - } + for (int i = 0; i < nlocal; ++i) { + //Assume all elements in the z direction are potentially coupled + d_nnz.emplace_back(d_nnz_map3d[i].size() * mesh->LocalNz + + d_nnz_map2d[i].size()); + o_nnz.emplace_back(o_nnz_map3d[i].size() * mesh->LocalNz + + o_nnz_map2d[i].size()); } } output_progress.write("Pre-allocating Jacobian\n"); - // Pre-allocate MatMPIAIJSetPreallocation(Jfd, 0, d_nnz.data(), 0, o_nnz.data()); MatSeqAIJSetPreallocation(Jfd, 0, d_nnz.data()); MatSetUp(Jfd); MatSetOption(Jfd, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE); - // Determine which row/columns of the matrix are locally owned - int Istart, Iend; - MatGetOwnershipRange(Jfd, &Istart, &Iend); - - // Convert local into global indices - // Note: Not in the boundary cells, to keep -1 values - for (const auto& i : mesh->getRegion3D("RGN_NOBNDRY")) { - index[i] += Istart; - } - - // Now communicate to fill guard cells - mesh->communicate(index); - ////////////////////////////////////////////////// // Mark non-zero entries output_progress.write("Marking non-zero Jacobian entries\n"); - - const PetscScalar val = 1.0; - + PetscScalar val = 1.0; for (int x = mesh->xstart; x <= mesh->xend; x++) { for (int y = mesh->ystart; y <= mesh->yend; y++) { @@ -507,35 +484,31 @@ int SNESSolver::init() { for (int i = 0; i < n2d; i++) { const PetscInt row = ind0 + i; - // Loop through each point in the 5-point stencil - for (const auto& xyoffset : xyoffsets) { - const int xi = x + xyoffset.first; - const int yi = y + xyoffset.second; - + // Loop through each point in the stencil + for (const auto& [x_off, y_off] : xy_offsets) { + const int xi = x + x_off; + const int yi = y + y_off; if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) || (yi >= mesh->LocalNy)) { continue; } - const int ind2 = ROUND(index(xi, yi, 0)); - + int ind2 = ROUND(index(xi, yi, 0)); if (ind2 < 0) { continue; // A boundary point } // Depends on all variables on this cell for (int j = 0; j < n2d; j++) { - const PetscInt col = ind2 + j; + PetscInt col = ind2 + j; ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); CHKERRQ(ierr); } } } - // 3D fields for (int z = 0; z < mesh->LocalNz; z++) { - - const int ind = ROUND(index(x, y, z)); + int ind = ROUND(index(x, y, z)); for (int i = 0; i < n3d; i++) { PetscInt row = ind + i; @@ -545,73 +518,48 @@ int SNESSolver::init() { // Depends on 2D fields for (int j = 0; j < n2d; j++) { - const PetscInt col = ind0 + j; + PetscInt col = ind0 + j; ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); CHKERRQ(ierr); } // Star pattern - for (const auto& xyoffset : xyoffsets) { - const int xi = x + xyoffset.first; - const int yi = y + xyoffset.second; + for (const auto& [x_off, y_off] : xy_offsets) { + int xi = x + x_off; + int yi = y + y_off; if ((xi < 0) || (yi < 0) || (xi >= mesh->LocalNx) || (yi >= mesh->LocalNy)) { continue; } - - int ind2 = ROUND(index(xi, yi, z)); - if (ind2 < 0) { - continue; // Boundary point - } - - if (z == 0) { - ind2 += n2d; - } - - // 3D fields on this cell - for (int j = 0; j < n3d; j++) { - const PetscInt col = ind2 + j; - ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - if (ierr != 0) { - output.write("ERROR: {} : ({}, {}) -> ({}, {}) : {} -> {}\n", row, x, - y, xi, yi, ind2, ind2 + n3d - 1); + for (int zi = 0; zi < mesh->LocalNz; ++zi) { + int ind2 = ROUND(index(xi, yi, zi)); + if (ind2 < 0) { + continue; // Boundary point } - CHKERRQ(ierr); - } - } - - int nz = mesh->LocalNz; - if (nz > 1) { - // Multiple points in z - int zp = (z + 1) % nz; + if (z == 0) { + ind2 += n2d; + } - int ind2 = ROUND(index(x, y, zp)); - if (zp == 0) { - ind2 += n2d; - } - for (int j = 0; j < n3d; j++) { - const PetscInt col = ind2 + j; - ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - CHKERRQ(ierr); - } + // 3D fields on this cell + for (int j = 0; j < n3d; j++) { + PetscInt col = ind2 + j; + ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - int zm = (z - 1 + nz) % nz; - ind2 = ROUND(index(x, y, zm)); - if (zm == 0) { - ind2 += n2d; - } - for (int j = 0; j < n3d; j++) { - const PetscInt col = ind2 + j; - ierr = MatSetValues(Jfd, 1, &row, 1, &col, &val, INSERT_VALUES); - CHKERRQ(ierr); + if (ierr != 0) { + output.write("ERROR: {} {} : ({}, {}) -> ({}, {}) : {} -> {}\n", + row, x, y, xi, yi, ind2, ind2 + n3d - 1); + } + CHKERRQ(ierr); + } } } } } } } + // Finished marking non-zero entries output_progress.write("Assembling Jacobian matrix\n"); @@ -620,6 +568,17 @@ int SNESSolver::init() { MatAssemblyBegin(Jfd, MAT_FINAL_ASSEMBLY); MatAssemblyEnd(Jfd, MAT_FINAL_ASSEMBLY); + //The above will probably miss some non-zero entries at process boundaries + //Making sure the colouring matrix is symmetric will in some/all(?) + //of the missing non-zeros + if ((*options)["force_symmetric_coloring"] + .doc("Modifies coloring matrix to force it to be symmetric") + .withDefault(false)) { + Mat Jfd_T; + MatCreateTranspose(Jfd, &Jfd_T); + MatAXPY(Jfd, 1, Jfd_T, DIFFERENT_NONZERO_PATTERN); + } + output_progress.write("Creating Jacobian coloring\n"); updateColoring();