Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
## notes on installation
Given that some roctx profiling calls are in openfoam macros it is necessary to
add the relevant compiler flags to succesfully compile the code, this is done
via FOAM_EXTRA environment variables

export ROCM4FOAM="path to your rocm installation"
export FOAM_EXTRA_CFLAGS="-DUSE_ROCTX -I${ROCM4FOAM}/roctracer/include/"
export FOAM_EXTRA_CXXFLAGS="-DUSE_ROCTX -I${ROCM4FOAM}/roctracer/include/"
export FOAM_EXTRA_LDFLAGS="${ROCM4FOAM}/lib/libroctx64.so -L${ROCM4FOAM}/lib

these must be set before compiling openfoam.

## About OpenFOAM
OpenFOAM is a free, open source CFD software [released and developed by OpenCFD Ltd since 2004](http://www.openfoam.com/history/).
It has a large user base across most areas of engineering and science, from both commercial and academic organisations.
Expand Down
2 changes: 2 additions & 0 deletions etc/bashrc
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,15 @@ export WM_COMPILE_OPTION=Opt
# ~libz : without libz compression
# ~rpath : without rpath handling [MacOS]
# +openmp : with openmp
# for clang optional add offload arch, i.e for amd MI300A +openmp:gfx942
# ~openmp : without openmp
# +ccache : use ccache
# +xcrun : use xcrun and native compilers [MacOS]
# +strict : more deprecation warnings (may generate *many* warnings)
# ccache=... : ccache command (unquoted, single/double or <> quoted)
# version=... : compiler suffix (eg, version=11 -> gcc-11)
#export WM_COMPILE_CONTROL="+strict"
#export WM_COMPILE_CONTROL="+openmp:gfx942 +link-ld"

# [WM_MPLIB] - MPI implementation:
# = SYSTEMOPENMPI | OPENMPI | SYSTEMMPI | MPI | MPICH | MPICH-GM |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2022-2023 OpenCFD Ltd.
Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
Expand Down Expand Up @@ -148,17 +149,35 @@ void Foam::lduCalculatedProcessorField<Type>::addToInternalField

if (add)
{
#ifdef USE_OMP
const label fCells = faceCells.size();
#pragma omp target teams distribute parallel for if (target:fCells>10000)
for (label elemI = 0; elemI < fCells; elemI++)
{
atomicAccumulator(result[faceCells[elemI]]) += (coeffs[elemI]*vals[elemI]);
}
#else
forAll(faceCells, elemI)
{
result[faceCells[elemI]] += coeffs[elemI]*vals[elemI];
}
#endif
}
else
{
#ifdef USE_OMP
const label fCells = faceCells.size();
#pragma omp target teams distribute parallel for if (target:fCells>10000)
for (label elemI = 0; elemI < fCells; elemI++)
{
atomicAccumulator(result[faceCells[elemI]]) -= (coeffs[elemI]*vals[elemI]);
}
#else
forAll(faceCells, elemI)
{
result[faceCells[elemI]] -= coeffs[elemI]*vals[elemI];
}
#endif
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
-------------------------------------------------------------------------------
Copyright (C) 2011-2013 OpenFOAM Foundation
Copyright (C) 2019-2023 OpenCFD Ltd.
Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
Expand Down Expand Up @@ -44,6 +45,16 @@ SourceFiles
#include "primitiveFieldsFwd.H"
#include "Pstream.H"

#ifdef USE_OMP
#include <omp.h>
#ifndef OMP_UNIFIED_MEMORY_REQUIRED
#define OMP_UNIFIED_MEMORY_REQUIRED
#pragma omp requires unified_shared_memory
#endif

#include "AtomicAccumulator.H"
#endif

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

namespace Foam
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2017-2019 OpenCFD Ltd.
Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
Expand Down Expand Up @@ -39,17 +40,35 @@ void Foam::lduInterfaceField::addToInternalField
{
if (add)
{
#ifdef USE_OMP
const label fCells = faceCells.size();
#pragma omp target teams distribute parallel for if (target:fCells>10000)
for (label elemi = 0; elemi < fCells; elemi++)
{
atomicAccumulator(result[faceCells[elemi]]) += (coeffs[elemi]*vals[elemi]);
}
#else
forAll(faceCells, elemi)
{
result[faceCells[elemi]] += coeffs[elemi]*vals[elemi];
}
#endif
}
else
{
#ifdef USE_OMP
const label fCells = faceCells.size();
#pragma omp target teams distribute parallel for if (target:fCells>10000)
for (label elemi = 0; elemi < fCells; elemi++)
{
atomicAccumulator(result[faceCells[elemi]]) -= (coeffs[elemi]*vals[elemi]);
}
#else
forAll(faceCells, elemi)
{
result[faceCells[elemi]] -= coeffs[elemi]*vals[elemi];
}
#endif
}
}

Expand Down
64 changes: 62 additions & 2 deletions src/OpenFOAM/matrices/lduMatrix/lduMatrix/lduMatrixATmul.C
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ Description
#define OMP_UNIFIED_MEMORY_REQUIRED
#pragma omp requires unified_shared_memory
#endif

#include "AtomicAccumulator.H"
#endif

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
Expand Down Expand Up @@ -90,12 +92,23 @@ void Foam::lduMatrix::Amul

const label nFaces = upper().size();

#ifdef USE_OMP
#pragma omp target teams distribute parallel for thread_limit(32) if (target:nFaces>10000)
for (label face=0; face<nFaces; face++)
{
const label lptr = lPtr[face];
const label uptr = uPtr[face];

atomicAccumulator(ApsiPtr[uptr]) += lowerPtr[face]*psiPtr[lptr];
atomicAccumulator(ApsiPtr[lptr]) += upperPtr[face]*psiPtr[uptr];
}
#else
for (label face=0; face<nFaces; face++)
{
ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
}

#endif
// Update interface interfaces
updateMatrixInterfaces
(
Expand Down Expand Up @@ -157,11 +170,23 @@ void Foam::lduMatrix::Tmul
}

const label nFaces = upper().size();
#ifdef USE_OMP
#pragma omp target teams distribute parallel for if (target:nFaces>10000)
for (label face=0; face<nFaces; face++)
{
const label lptr = lPtr[face];
const label uptr = uPtr[face];

atomicAccumulator(TpsiPtr[uptr]) += upperPtr[face]*psiPtr[lptr];
atomicAccumulator(TpsiPtr[lptr]) += lowerPtr[face]*psiPtr[uptr];
}
#else
for (label face=0; face<nFaces; face++)
{
TpsiPtr[uPtr[face]] += upperPtr[face]*psiPtr[lPtr[face]];
TpsiPtr[lPtr[face]] += lowerPtr[face]*psiPtr[uPtr[face]];
}
#endif

// Update interface interfaces
updateMatrixInterfaces
Expand Down Expand Up @@ -207,11 +232,23 @@ void Foam::lduMatrix::sumA
sumAPtr[cell] = diagPtr[cell];
}

#ifdef USE_OMP
#pragma omp target teams distribute parallel for thread_limit(64) if (target:nFaces>10000)
for (label face=0; face<nFaces; face++)
{
const label uptr = uPtr[face];
const label lptr = lPtr[face];

atomicAccumulator(sumAPtr[uptr]) += lowerPtr[face];
atomicAccumulator(sumAPtr[lptr]) += upperPtr[face];
}
#else
for (label face=0; face<nFaces; face++)
{
sumAPtr[uPtr[face]] += lowerPtr[face];
sumAPtr[lPtr[face]] += upperPtr[face];
}
#endif

// Add the interface internal coefficients to diagonal
// and the interface boundary coefficients to the sum-off-diagonal
Expand Down Expand Up @@ -289,12 +326,23 @@ void Foam::lduMatrix::residual

const label nFaces = upper().size();

#ifdef USE_OMP
#pragma omp target teams distribute parallel for if (target:nFaces>10000)
for (label face=0; face<nFaces; face++)
{
const label lptr = lPtr[face];
const label uptr = uPtr[face];

atomicAccumulator(rAPtr[uptr]) -= lowerPtr[face]*psiPtr[lptr];
atomicAccumulator(rAPtr[lptr]) -= upperPtr[face]*psiPtr[uptr];
}
#else
for (label face=0; face<nFaces; face++)
{
rAPtr[uPtr[face]] -= lowerPtr[face]*psiPtr[lPtr[face]];
rAPtr[lPtr[face]] -= upperPtr[face]*psiPtr[uPtr[face]];
}

#endif
// Update interface interfaces
updateMatrixInterfaces
(
Expand Down Expand Up @@ -340,11 +388,23 @@ Foam::tmp<Foam::scalarField> Foam::lduMatrix::H1() const

const label nFaces = upper().size();

#ifdef USE_OMP
#pragma omp target teams distribute parallel for thread_limit(64) if (target:nFaces>10000)
for (label face=0; face<nFaces; face++)
{
const label lptr = lPtr[face];
const label uptr = uPtr[face];

atomicAccumulator(H1Ptr[uptr]) -= lowerPtr[face];
atomicAccumulator(H1Ptr[lptr]) -= upperPtr[face];
}
#else
for (label face=0; face<nFaces; face++)
{
H1Ptr[uPtr[face]] -= lowerPtr[face];
H1Ptr[lPtr[face]] -= upperPtr[face];
}
#endif
}

return tH1;
Expand Down
29 changes: 29 additions & 0 deletions src/OpenFOAM/matrices/lduMatrix/lduMatrix/lduMatrixOperations.C
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ Description
#define OMP_UNIFIED_MEMORY_REQUIRED
#pragma omp requires unified_shared_memory
#endif

#include "AtomicAccumulator.H"
#endif

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
Expand All @@ -51,11 +53,20 @@ void Foam::lduMatrix::sumDiag()
const labelUList& l = lduAddr().lowerAddr();
const labelUList& u = lduAddr().upperAddr();

#ifdef USE_OMP
#pragma omp target teams distribute parallel for if (target:l.size()>10000)
for (label face=0; face<l.size(); face++)
{
atomicAccumulator(Diag[l[face]]) += Lower[face];
atomicAccumulator(Diag[u[face]]) += Upper[face];
}
#else
for (label face=0; face<l.size(); face++)
{
Diag[l[face]] += Lower[face];
Diag[u[face]] += Upper[face];
}
#endif
}


Expand All @@ -68,11 +79,20 @@ void Foam::lduMatrix::negSumDiag()
const labelUList& l = lduAddr().lowerAddr();
const labelUList& u = lduAddr().upperAddr();

#ifdef USE_OMP
#pragma omp target teams distribute parallel for if (target:l.size()>10000)
for (label face=0; face<l.size(); face++)
{
atomicAccumulator(Diag[l[face]]) -= Lower[face];
atomicAccumulator(Diag[u[face]]) -= Upper[face];
}
#else
for (label face=0; face<l.size(); face++)
{
Diag[l[face]] -= Lower[face];
Diag[u[face]] -= Upper[face];
}
#endif
}


Expand All @@ -87,11 +107,20 @@ void Foam::lduMatrix::sumMagOffDiag
const labelUList& l = lduAddr().lowerAddr();
const labelUList& u = lduAddr().upperAddr();

#ifdef USE_OMP
#pragma omp target teams distribute parallel for if (target:l.size()>10000)
for (label face = 0; face < l.size(); face++)
{
atomicAccumulator(sumOff[u[face]]) += mag(Lower[face]);
atomicAccumulator(sumOff[l[face]]) += mag(Upper[face]);
}
#else
for (label face = 0; face < l.size(); face++)
{
sumOff[u[face]] += mag(Lower[face]);
sumOff[l[face]] += mag(Upper[face]);
}
#endif
}


Expand Down
16 changes: 15 additions & 1 deletion src/OpenFOAM/matrices/lduMatrix/lduMatrix/lduMatrixTemplates.C
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Description
#define OMP_UNIFIED_MEMORY_REQUIRED
#pragma omp requires unified_shared_memory
#endif

#include "AtomicAccumulator.H"
#endif

// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
Expand All @@ -60,11 +62,23 @@ Foam::tmp<Foam::Field<Type>> Foam::lduMatrix::H(const Field<Type>& psi) const

const label nFaces = upper().size();

#ifdef USE_OMP
#pragma omp target teams distribute parallel for thread_limit(64) if (target:nFaces>10000)
for (label face=0; face<nFaces; face++)
{
const label lptr = lPtr[face];
const label uptr = uPtr[face];

atomicAccumulator(HpsiPtr[uptr]) -= lowerPtr[face]*psiPtr[lptr];
atomicAccumulator(HpsiPtr[lptr]) -= upperPtr[face]*psiPtr[uptr];
}
#else
for (label face=0; face<nFaces; face++)
{
HpsiPtr[uPtr[face]] -= lowerPtr[face]*psiPtr[lPtr[face]];
HpsiPtr[lPtr[face]] -= upperPtr[face]*psiPtr[uPtr[face]];
}
#endif
}

return tHpsi;
Expand Down Expand Up @@ -105,7 +119,7 @@ Foam::lduMatrix::faceH(const Field<Type>& psi) const
Upper[face]*psi[u[face]]
- Lower[face]*psi[l[face]];
}

return tfaceHpsi;
}

Expand Down
Loading