diff --git a/CMakeLists.txt b/CMakeLists.txt index 75d1a9da..dd8750b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,7 +117,7 @@ CPMAddPackage( CPMAddPackage( NAME ViennaRay - VERSION 3.10.1 + VERSION 3.10.2 GIT_REPOSITORY "https://github.com/ViennaTools/ViennaRay" EXCLUDE_FROM_ALL ${VIENNAPS_BUILD_PYTHON} OPTIONS "VIENNARAY_USE_GPU ${VIENNAPS_USE_GPU}") diff --git a/README.md b/README.md index dc60f296..4fd13946 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ ViennaPS is also available on the [Python Package Index (PyPI)](https://pypi.org * macOS (clang) -* Windows (Visual Studio) +* Windows (MSVC) ### System Requirements @@ -55,7 +55,7 @@ ViennaPS is also available on the [Python Package Index (PyPI)](https://pypi.org ### ViennaTools Dependencies (installed automatically) -ViennaPS is part of the ViennaTools ecosystem and depends on several lightweight, header-only ViennaTools libraries. During configuration, CMake will look for them and fetch them automatically as part of the ViennaPS build. No separate installation step is required for these dependencies: +ViennaPS is part of the ViennaTools ecosystem and depends on several lightweight, header-only ViennaTools libraries. During configuration, CMake will fetch them automatically as part of the ViennaPS build. No separate installation step is required for these dependencies: * [ViennaCore](https://github.com/ViennaTools/viennacore) * [ViennaLS](https://github.com/ViennaTools/viennals) @@ -98,6 +98,14 @@ cd ViennaPS pip install . ``` +To build the Python package with **GPU** support, use the install script in `python/scripts` folder. On Linux, e.g., run: +```bash +python3 -m venv .venv # create virtual environment (optional, but recommended) +source .venv/bin/activate # activate virtual environment +python python/scripts/install_ViennaPS.py +``` +A CUDA toolkit and driver compatible with your GPU must be installed on your system to use the GPU functionality. + > Some features of the ViennaPS Python module depend on the ViennaLS Python module. The ViennaLS is installed automatically as a dependency. > Note: A locally built ViennaPS Python module is typically not compatible with the ViennaLS package from PyPI. For details and troubleshooting, see [this guide](https://viennatools.github.io/ViennaPS/inst/troubleshooting.html#python-importerror). diff --git a/examples/DRAMWiggling/DRAMWiggling.cpp b/examples/DRAMWiggling/DRAMWiggling.cpp index f1fd0934..e7f0c2ee 100644 --- a/examples/DRAMWiggling/DRAMWiggling.cpp +++ b/examples/DRAMWiggling/DRAMWiggling.cpp @@ -3,8 +3,6 @@ using namespace viennaps; int main(int argc, char **argv) { - using NumericType = double; - constexpr int D = 3; Logger::setLogLevel(LogLevel::INFO); omp_set_num_threads(12); @@ -27,23 +25,22 @@ int main(int argc, char **argv) { units::Length::setUnit(params.get("lengthUnit")); units::Time::setUnit(params.get("timeUnit")); - constexpr NumericType gridDelta = 0.01 * (1. + 1e-12); - BoundaryType boundaryConds[D] = {BoundaryType::REFLECTIVE_BOUNDARY, + constexpr double gridDelta = 0.01 * (1. + 1e-12); + BoundaryType boundaryConds[3] = {BoundaryType::REFLECTIVE_BOUNDARY, BoundaryType::REFLECTIVE_BOUNDARY, BoundaryType::INFINITE_BOUNDARY}; - auto mask = - SmartPointer>::New(gridDelta, boundaryConds); + auto mask = SmartPointer::New(gridDelta, boundaryConds); mask->setBoundaryPadding(0.1, 0.1); - GDSReader(mask, params.get("gdsFile")).apply(); + GDSReader_double_3(mask, params.get("gdsFile")).apply(); // geometry setup - auto geometry = Domain::New(); + auto geometry = Domain_double_3::New(); auto maskLS = mask->layerToLevelSet(0, 0.0, 0.18); geometry->insertNextLevelSetAsMaterial(maskLS, Material::Mask); - MakePlane(geometry, 0.0, Material::Si, true).apply(); + MakePlane_double_3(geometry, 0.0, Material::Si, true).apply(); - auto modelParams = HBrO2Etching::defaultParameters(); + auto modelParams = HBrO2Etching_double_3::defaultParameters(); modelParams.ionFlux = params.get("ionFlux"); modelParams.etchantFlux = params.get("etchantFlux"); modelParams.passivationFlux = params.get("oxygenFlux"); @@ -52,7 +49,7 @@ int main(int argc, char **argv) { modelParams.Ions.exponent = params.get("ionExponent"); modelParams.Ions.n_l = 200; modelParams.Substrate.B_sp = 0.75; - auto model = SmartPointer>::New(modelParams); + auto model = SmartPointer::New(modelParams); // Advection parameters AdvectionParameters advectionParams; @@ -69,7 +66,7 @@ int main(int argc, char **argv) { coverageParams.tolerance = 1e-5; // Process setup - Process process(geometry, model, params.get("processTime")); + Process_double_3 process(geometry, model, params.get("processTime")); process.setParameters(advectionParams); process.setParameters(rayParams); process.setParameters(coverageParams); diff --git a/examples/DRAMWiggling/DRAMWiggling.py b/examples/DRAMWiggling/DRAMWiggling.py index d6ebb3ae..35df9199 100644 --- a/examples/DRAMWiggling/DRAMWiggling.py +++ b/examples/DRAMWiggling/DRAMWiggling.py @@ -35,9 +35,6 @@ # Add plane ps.MakePlane(geometry, 0.0, ps.Material.Si, True).apply() -# print intermediate output surfaces during the process -ps.Logger.setLogLevel(ps.LogLevel.INFO) - ps.Length.setUnit(params["lengthUnit"]) ps.Time.setUnit(params["timeUnit"]) @@ -49,6 +46,7 @@ modelParams.Ions.sigmaEnergy = params["sigmaEnergy"] modelParams.Ions.exponent = params["ionExponent"] modelParams.Ions.n_l = 200 +modelParams.Substrate.B_sp = 0.75 model = ps.HBrO2Etching(modelParams) coverageParameters = ps.CoverageParameters() @@ -58,9 +56,7 @@ rayTracingParams.raysPerPoint = int(params["raysPerPoint"]) advectionParams = ps.AdvectionParameters() -advectionParams.spatialScheme = ps.util.convertSpatialScheme( - params["spatialScheme"] -) +advectionParams.spatialScheme = ps.util.convertSpatialScheme(params["spatialScheme"]) fluxEngineStr = params["fluxEngine"] fluxEngine = ps.util.convertFluxEngineType(fluxEngineStr) diff --git a/examples/SiGeSelectiveEtching/SiGeEtching.py b/examples/SiGeSelectiveEtching/SiGeEtching.py index 369ff687..1c5a1029 100644 --- a/examples/SiGeSelectiveEtching/SiGeEtching.py +++ b/examples/SiGeSelectiveEtching/SiGeEtching.py @@ -1,7 +1,7 @@ -import viennaps.d2 as psd import viennaps as ps from SiGeStackGeometry import CreateGeometry +ps.setDimension(2) ps.setNumThreads(16) # create initial geometry @@ -71,7 +71,7 @@ ps.Material.SiGe: 0.7, } -model = psd.CF4O2Etching(modelParams) +model = ps.CF4O2Etching(modelParams) parameters = model.getParameters() covParams = ps.CoverageParameters() @@ -85,7 +85,7 @@ advParams.timeStepRatio = 0.2 # process setup -process = psd.Process() +process = ps.Process() process.setDomain(geometry) process.setProcessModel(model) process.setProcessDuration(params["processTime"]) # seconds diff --git a/examples/SiGeSelectiveEtching/SiGeStackGeometry.py b/examples/SiGeSelectiveEtching/SiGeStackGeometry.py index fd885e5a..d502ae07 100644 --- a/examples/SiGeSelectiveEtching/SiGeStackGeometry.py +++ b/examples/SiGeSelectiveEtching/SiGeStackGeometry.py @@ -1,14 +1,13 @@ # Create a SiGe stack geometry with SiO2 mask -import viennaps.d2 as psd import viennaps as ps -import viennals.d2 as lsd from viennals import BooleanOperationEnum +ps.setDimension(2) -def CreateGeometry(paramDict: dict) -> psd.Domain: - domain = psd.Domain() +def CreateGeometry(paramDict: dict) -> ps.Domain: + domain = ps.Domain() totalHeight = paramDict["numLayers"] * paramDict["layerHeight"] extent = ( 2 * paramDict["lateralSpacing"] @@ -25,8 +24,8 @@ def CreateGeometry(paramDict: dict) -> psd.Domain: origin = [0, 0] # substrate plane - plane = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"]) - lsd.MakeGeometry(plane, lsd.Plane(origin, normal)).apply() + plane = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"]) + ps.ls.MakeGeometry(plane, ps.ls.Plane(origin, normal)).apply() domain.insertNextLevelSetAsMaterial( levelSet=plane, material=ps.Material.Si, wrapLowerLevelSet=True ) @@ -34,8 +33,8 @@ def CreateGeometry(paramDict: dict) -> psd.Domain: # alternating layers for i in range(paramDict["numLayers"]): origin[1] += paramDict["layerHeight"] - plane = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"]) - lsd.MakeGeometry(plane, lsd.Plane(origin, normal)).apply() + plane = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"]) + ps.ls.MakeGeometry(plane, ps.ls.Plane(origin, normal)).apply() if i % 2 == 0: domain.insertNextLevelSetAsMaterial(plane, ps.Material.SiGe) else: @@ -44,15 +43,15 @@ def CreateGeometry(paramDict: dict) -> psd.Domain: # SiO2 mask maskPosY = totalHeight + paramDict["maskHeight"] origin[1] = maskPosY - mask = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"]) - lsd.MakeGeometry(mask, lsd.Plane(origin, normal)).apply() + mask = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"]) + ps.ls.MakeGeometry(mask, ps.ls.Plane(origin, normal)).apply() domain.insertNextLevelSetAsMaterial(mask, ps.Material.SiO2) # mask maskPosY = totalHeight + paramDict["maskHeight"] + 5 * paramDict["gridDelta"] origin[1] = maskPosY - etchMask = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"]) - lsd.MakeGeometry(etchMask, lsd.Plane(origin, normal)).apply() + etchMask = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"]) + ps.ls.MakeGeometry(etchMask, ps.ls.Plane(origin, normal)).apply() domain.insertNextLevelSetAsMaterial(etchMask, ps.Material.Mask) # left right space @@ -64,20 +63,20 @@ def CreateGeometry(paramDict: dict) -> psd.Domain: -extent / 2 + paramDict["lateralSpacing"], maskPosY + paramDict["gridDelta"], ] - box = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"]) - lsd.MakeGeometry(box, lsd.Box(minPoint, maxPoint)).apply() + box = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"]) + ps.ls.MakeGeometry(box, ps.ls.Box(minPoint, maxPoint)).apply() domain.applyBooleanOperation(box, BooleanOperationEnum.RELATIVE_COMPLEMENT) minPoint[0] = extent / 2 - paramDict["lateralSpacing"] maxPoint[0] = extent / 2 + paramDict["gridDelta"] - lsd.MakeGeometry(box, lsd.Box(minPoint, maxPoint)).apply() + ps.ls.MakeGeometry(box, ps.ls.Box(minPoint, maxPoint)).apply() domain.applyBooleanOperation(box, BooleanOperationEnum.RELATIVE_COMPLEMENT) xpos = -extent / 2 + paramDict["lateralSpacing"] + paramDict["maskWidth"] for i in range(paramDict["numPillars"]): minPoint[0] = xpos maxPoint[0] = xpos + paramDict["trenchWidthTop"] - lsd.MakeGeometry(box, lsd.Box(minPoint, maxPoint)).apply() + ps.ls.MakeGeometry(box, ps.ls.Box(minPoint, maxPoint)).apply() domain.applyBooleanOperation(box, BooleanOperationEnum.RELATIVE_COMPLEMENT) xpos += paramDict["maskWidth"] + paramDict["trenchWidthTop"] @@ -90,14 +89,14 @@ def CreateGeometry(paramDict: dict) -> psd.Domain: * (paramDict["trenchWidthTop"] - paramDict["trenchWidthBottom"]) / (paramDict["numLayers"] * paramDict["layerHeight"] + paramDict["maskHeight"]) ) - processModel = psd.DirectionalProcess(direction, -1, isoVel, ps.Material.Mask) + processModel = ps.DirectionalProcess(direction, -1, isoVel, ps.Material.Mask) time = ( paramDict["numLayers"] * paramDict["layerHeight"] + paramDict["maskHeight"] + paramDict["overEtch"] ) - psd.Process(domain, processModel, time).apply() + ps.Process(domain, processModel, time).apply() # remove trench etching mask domain.removeTopLevelSet() diff --git a/examples/TEOSTrenchDeposition/multiTEOS.py b/examples/TEOSTrenchDeposition/multiTEOS.py index 07d0656e..388117e9 100644 --- a/examples/TEOSTrenchDeposition/multiTEOS.py +++ b/examples/TEOSTrenchDeposition/multiTEOS.py @@ -53,8 +53,8 @@ process.setParameters(rayParams) process.setProcessDuration(params["processTime"]) -geometry.saveVolumeMesh("MultiTEOS_initial.vtp") +geometry.saveVolumeMesh("MultiTEOS_initial") process.apply() -geometry.saveVolumeMesh("MultiTEOS_final.vtp") +geometry.saveVolumeMesh("MultiTEOS_final") diff --git a/examples/TEOSTrenchDeposition/singleTEOS.py b/examples/TEOSTrenchDeposition/singleTEOS.py index 75330909..48cbc5bd 100644 --- a/examples/TEOSTrenchDeposition/singleTEOS.py +++ b/examples/TEOSTrenchDeposition/singleTEOS.py @@ -51,8 +51,8 @@ process.setParameters(rayParams) process.setProcessDuration(params["processTime"]) -geometry.saveVolumeMesh("SingleTEOS_initial.vtp") +geometry.saveVolumeMesh("SingleTEOS_initial") process.apply() -geometry.saveVolumeMesh("SingleTEOS_final.vtp") +geometry.saveVolumeMesh("SingleTEOS_final") diff --git a/examples/atomicLayerDeposition/atomicLayerDeposition.py b/examples/atomicLayerDeposition/atomicLayerDeposition.py index 5f6c1328..3af2a782 100644 --- a/examples/atomicLayerDeposition/atomicLayerDeposition.py +++ b/examples/atomicLayerDeposition/atomicLayerDeposition.py @@ -43,7 +43,7 @@ ls.MakeGeometry(horiBox, ls.Box(minPoint, maxPoint)).apply() geometry.applyBooleanOperation(horiBox, ls.BooleanOperationEnum.RELATIVE_COMPLEMENT) -geometry.saveVolumeMesh("SingleParticleALD_initial.vtu") +geometry.saveVolumeMesh("SingleParticleALD_initial") geometry.duplicateTopLevelSet(ps.Material.Al2O3) @@ -81,4 +81,4 @@ # MeasureProfile(domain, params.get("gapHeight") / 2.) # .save(params.get("outputFile")); -geometry.saveVolumeMesh("SingleParticleALD_final.vtu") +geometry.saveVolumeMesh("SingleParticleALD_final") diff --git a/examples/blazedGratingsEtching/blazedGratingsEtching.cpp b/examples/blazedGratingsEtching/blazedGratingsEtching.cpp index 5beac354..5b386b21 100644 --- a/examples/blazedGratingsEtching/blazedGratingsEtching.cpp +++ b/examples/blazedGratingsEtching/blazedGratingsEtching.cpp @@ -7,9 +7,8 @@ using namespace viennaps; int main(int argc, char **argv) { - using NumericType = double; - constexpr int D = 2; omp_set_num_threads(16); + Logger::setLogLevel(LogLevel::DEBUG); // Parse the parameters util::Parameters params; @@ -25,10 +24,10 @@ int main(int argc, char **argv) { } } - auto geometry = GenerateMask( - params.get("bumpWidth"), params.get("bumpHeight"), - params.get("numBumps"), params.get("bumpDuty"), - params.get("gridDelta")); + auto geometry = + GenerateMask(params.get("bumpWidth"), params.get("bumpHeight"), + params.get("numBumps"), + params.get("bumpDuty"), params.get("gridDelta")); geometry->saveSurfaceMesh("initial"); AdvectionParameters advParams; @@ -40,9 +39,9 @@ int main(int argc, char **argv) { rayTracingParams.raysPerPoint = params.get("raysPerPoint"); rayTracingParams.smoothingNeighbors = 1; - const NumericType yieldFac = params.get("yieldFactor"); + const double yieldFac = params.get("yieldFactor"); - IBEParameters ibeParams; + IBEParameters ibeParams; ibeParams.materialPlaneWaferRate[Material::SiO2] = 1.0; ibeParams.materialPlaneWaferRate[Material::Mask] = 1. / 11.; ibeParams.exponent = params.get("exponent"); @@ -51,36 +50,44 @@ int main(int argc, char **argv) { ibeParams.cos4Yield.a1 = yieldFac; ibeParams.cos4Yield.a2 = -1.55; ibeParams.cos4Yield.a3 = 0.65; - auto model = SmartPointer>::New(ibeParams); - - Process process(geometry, model); - process.setParameters(advParams); - process.setParameters(rayTracingParams); // ANSGM Etch - NumericType angle = params.get("phi1"); - NumericType angleRad = constants::degToRad(angle); - model->setPrimaryDirection( - Vec3D{-std::sin(angleRad), -std::cos(angleRad), 0}); - ibeParams.tiltAngle = angle; + { + double angle = params.get("phi1"); + double angleRad = constants::degToRad(angle); + ibeParams.tiltAngle = angle; + auto model = SmartPointer::New(ibeParams); + model->setPrimaryDirection( + Vec3Dd{-std::sin(angleRad), -std::cos(angleRad), 0}); + model->setProcessName("ANSGM_Etch"); - process.setProcessDuration(params.get("ANSGM_Depth")); - process.apply(); - geometry->saveSurfaceMesh("ANSGM_Etch"); + Process_double_2(geometry, model, params.get("ANSGM_Depth"), advParams, + rayTracingParams) + .apply(); + geometry->saveSurfaceMesh("ANSGM_Etch"); + } geometry->removeTopLevelSet(); // remove mask geometry->saveSurfaceMesh("ANSGM"); // Blazed Gratings Etch - angle = params.get("phi2"); - angleRad = constants::degToRad(angle); - model->setPrimaryDirection( - Vec3D{-std::sin(angleRad), -std::cos(angleRad), 0}); - ibeParams.tiltAngle = angle; + { + double angle = params.get("phi2"); + double angleRad = constants::degToRad(angle); + ibeParams.tiltAngle = angle; + auto model = SmartPointer::New(ibeParams); + model->setPrimaryDirection( + Vec3Dd{-std::sin(angleRad), -std::cos(angleRad), 0}); + model->setProcessName("BlazedGratings_Etch"); - for (int i = 1; i < 5; ++i) { - process.setProcessDuration(params.get("etchTimeP" + std::to_string(i))); - process.apply(); - geometry->saveSurfaceMesh("BlazedGratingsEtch_P" + std::to_string(i)); + Process_double_2 process(geometry, model); + process.setParameters(advParams); + process.setParameters(rayTracingParams); + + for (int i = 1; i < 5; ++i) { + process.setProcessDuration(params.get("etchTimeP" + std::to_string(i))); + process.apply(); + geometry->saveSurfaceMesh("BlazedGratingsEtch_P" + std::to_string(i)); + } } } diff --git a/examples/blazedGratingsEtching/blazedGratingsEtching.py b/examples/blazedGratingsEtching/blazedGratingsEtching.py index 39264555..fa09aff8 100644 --- a/examples/blazedGratingsEtching/blazedGratingsEtching.py +++ b/examples/blazedGratingsEtching/blazedGratingsEtching.py @@ -70,38 +70,51 @@ ibeParams.cos4Yield.a2 = -1.55 ibeParams.cos4Yield.a3 = 0.65 -model = ps.IonBeamEtching() # ----- ANSGM Etch ----- # -angle = params["phi1"] -direction = [0.0, 0.0, 0.0] -direction[0] = -np.sin(np.deg2rad(angle)) -direction[1] = -np.cos(np.deg2rad(angle)) -ibeParams.tiltAngle = angle -model.setPrimaryDirection(direction) -model.setParameters(ibeParams) - -process = ps.Process(geometry, model, 0.0) -process.setParameters(advectionParams) -process.setParameters(rayTracingParams) - -process.setProcessDuration(params["ANSGM_Depth"]) -process.apply() -geometry.saveSurfaceMesh("ANSGM_Etch", True) +def ANSGM_Etch(): + angle = params["phi1"] + direction = [0.0, 0.0, 0.0] + direction[0] = -np.sin(np.deg2rad(angle)) + direction[1] = -np.cos(np.deg2rad(angle)) + ibeParams.tiltAngle = angle + model = ps.IonBeamEtching(ibeParams, maskMaterials=[]) + model.setPrimaryDirection(direction) + model.setProcessName("ANSGM_Etch") + + process = ps.Process(geometry, model, params["ANSGM_Depth"]) + process.setParameters(advectionParams) + process.setParameters(rayTracingParams) + process.apply() + geometry.saveSurfaceMesh("ANSGM_Etch", True) + + +ANSGM_Etch() # remove mask geometry.removeTopLevelSet() geometry.saveSurfaceMesh("ANSGM", True) + # ------ Blazed Gratins Etch ------ # -angle = params["phi2"] -direction[0] = -np.sin(np.deg2rad(angle)) -direction[1] = -np.cos(np.deg2rad(angle)) -ibeParams.tiltAngle = angle -model.setPrimaryDirection(direction) -model.setParameters(ibeParams) - -for i in range(1, 5): - process.setProcessDuration(params["etchTimeP" + str(i)]) - process.apply() - geometry.saveSurfaceMesh("BlazedGratingsEtch_P" + str(i)) +def BlazedGratings_Etch(): + angle = params["phi2"] + direction = [0.0, 0.0, 0.0] + direction[0] = -np.sin(np.deg2rad(angle)) + direction[1] = -np.cos(np.deg2rad(angle)) + ibeParams.tiltAngle = angle + model = ps.IonBeamEtching(ibeParams, maskMaterials=[]) + model.setPrimaryDirection(direction) + model.setProcessName("BlazedGratings_Etch") + + process = ps.Process(geometry, model) + process.setParameters(advectionParams) + process.setParameters(rayTracingParams) + + for i in range(1, 5): + process.setProcessDuration(params["etchTimeP" + str(i)]) + process.apply() + geometry.saveSurfaceMesh("BlazedGratingsEtch_P" + str(i)) + + +BlazedGratings_Etch() diff --git a/examples/boschProcess/config.txt b/examples/boschProcess/config.txt index 8beb66d2..cc046de4 100644 --- a/examples/boschProcess/config.txt +++ b/examples/boschProcess/config.txt @@ -1,7 +1,7 @@ # Geometry gridDelta = 0.025 xExtent = 3.5 -yExtent = 3.5 +yExtent = 1.5 trenchWidth = 2.0 maskHeight = 0.6 diff --git a/examples/emulation/FinFET.py b/examples/emulation/FinFET.py index 2865f7b2..13c18761 100644 --- a/examples/emulation/FinFET.py +++ b/examples/emulation/FinFET.py @@ -195,9 +195,7 @@ def writeSurface(domain): print("S/D Epitaxy ...", end="", flush=True) domain.duplicateTopLevelSet(ps.Material.SiGe) advectionParams = ps.AdvectionParameters() -advectionParams.spatialScheme = ( - ps.SpatialScheme.STENCIL_LOCAL_LAX_FRIEDRICHS_1ST_ORDER -) +advectionParams.spatialScheme = ps.SpatialScheme.STENCIL_LOCAL_LAX_FRIEDRICHS_1ST_ORDER ps.StencilLocalLaxFriedrichsScalar.setMaxDissipation(1000) material = [ (ps.Material.Si, 1.0), diff --git a/examples/faradayCageEtching/config.txt b/examples/faradayCageEtching/config.txt index 02768747..439993a0 100644 --- a/examples/faradayCageEtching/config.txt +++ b/examples/faradayCageEtching/config.txt @@ -11,4 +11,3 @@ tiltAngle = 60.0 cageAngle = 90.0 etchTime = 7.0 -raysPerPoint = 1000 diff --git a/examples/faradayCageEtching/faradayCageEtching.cpp b/examples/faradayCageEtching/faradayCageEtching.cpp index 5913c461..ff6f1e03 100644 --- a/examples/faradayCageEtching/faradayCageEtching.cpp +++ b/examples/faradayCageEtching/faradayCageEtching.cpp @@ -10,7 +10,6 @@ int main(int argc, char *argv[]) { using NumericType = double; constexpr int D = 3; - ps::Logger::setLogLevel(ps::LogLevel::INTERMEDIATE); omp_set_num_threads(16); // Parse the parameters @@ -46,20 +45,15 @@ int main(int argc, char *argv[]) { cageParams, maskMaterials); ps::AdvectionParameters advectionParams; - advectionParams.spatialScheme = - ps::SpatialScheme::LOCAL_LAX_FRIEDRICHS_1ST_ORDER; - - ps::RayTracingParameters rayParams; - rayParams.raysPerPoint = params.get("raysPerPoint"); + advectionParams.spatialScheme = ps::SpatialScheme::LAX_FRIEDRICHS_1ST_ORDER; // process setup ps::Process process(geometry, model); process.setProcessDuration(params.get("etchTime")); - process.setParameters(rayParams); process.setParameters(advectionParams); // print initial surface - geometry->saveHullMesh("initial.vtp"); + geometry->saveHullMesh("initial"); // run the process process.apply(); diff --git a/examples/faradayCageEtching/faradayCageEtching.py b/examples/faradayCageEtching/faradayCageEtching.py index c8dcd3ae..9556bef7 100644 --- a/examples/faradayCageEtching/faradayCageEtching.py +++ b/examples/faradayCageEtching/faradayCageEtching.py @@ -42,12 +42,16 @@ parameters.ibeParams.tiltAngle = params["tiltAngle"] mask = [ps.Material.Mask] -model = ps.FaradayCageEtching(mask, parameters) +model = ps.FaradayCageEtching(parameters, mask) + +advParams = ps.AdvectionParameters() +advParams.spatialScheme = ps.SpatialScheme.LAX_FRIEDRICHS_1ST_ORDER # process setup process = ps.Process() process.setDomain(geometry) process.setProcessModel(model) +process.setParameters(advParams) process.setProcessDuration(params["etchTime"]) # seconds # print initial surface diff --git a/examples/holeEtching/config.txt b/examples/holeEtching/config.txt index 4af1874e..81f19b0f 100644 --- a/examples/holeEtching/config.txt +++ b/examples/holeEtching/config.txt @@ -32,5 +32,3 @@ spatialScheme=EO_1 temporalScheme=RK3 raysPerPoint=1000 - -outputFile=final_y0p62 diff --git a/examples/holeEtching/holeEtching.cpp b/examples/holeEtching/holeEtching.cpp index b964ca61..831cf822 100644 --- a/examples/holeEtching/holeEtching.cpp +++ b/examples/holeEtching/holeEtching.cpp @@ -37,7 +37,7 @@ int main(int argc, char *argv[]) { 0.0, // holeDepth 0.0, // holeTaperAngle params.get("maskHeight"), params.get("taperAngle"), - HoleShape::QUARTER) + HoleShape::HALF) .apply(); // use pre-defined model SF6O2 etching model @@ -79,6 +79,5 @@ int main(int argc, char *argv[]) { process.apply(); // print final surface - geometry->saveSurfaceMesh(params.get("outputFile"), true, 0.01, - true); + geometry->saveSurfaceMesh("final.vtp", true, 0.01, true); } diff --git a/examples/selectiveEpitaxy/selectiveEpitaxy.py b/examples/selectiveEpitaxy/selectiveEpitaxy.py index 5c38b059..55e3c2b7 100644 --- a/examples/selectiveEpitaxy/selectiveEpitaxy.py +++ b/examples/selectiveEpitaxy/selectiveEpitaxy.py @@ -48,9 +48,7 @@ ) advectionParams = ps.AdvectionParameters() -advectionParams.spatialScheme = ( - ps.SpatialScheme.STENCIL_LOCAL_LAX_FRIEDRICHS_1ST_ORDER -) +advectionParams.spatialScheme = ps.SpatialScheme.STENCIL_LOCAL_LAX_FRIEDRICHS_1ST_ORDER process = ps.Process(geometry, model, params["processTime"]) process.setParameters(advectionParams) diff --git a/gpu/benchmark/CPU_Benchmark.cpp b/gpu/benchmark/CPU_Benchmark.cpp index 257a890c..9c5fa51c 100644 --- a/gpu/benchmark/CPU_Benchmark.cpp +++ b/gpu/benchmark/CPU_Benchmark.cpp @@ -125,6 +125,8 @@ int main() { tracer.setUseRandomSeeds(false); tracer.setParticleType(particle); + std::vector dataLabels = particle->getLocalDataLabels(); + for (int i = 0; i < gridDeltaValues.size(); i++) { std::cout << " Grid Delta: " << gridDeltaValues[i] << "\n"; auto domain = MAKE_GEO(gridDeltaValues[i]); @@ -186,10 +188,11 @@ int main() { tracer.normalizeFlux(fluxResult); fluxResultVec.push_back(std::move(fluxResult)); } - ElementToPointData( - IndexMap(particles), fluxResultVec, pointData, elementKdTree, - diskMesh, surfMesh, domain->getGridDelta() * 2.0f) - .apply(); + ElementToPointData post( + dataLabels, pointData, elementKdTree, diskMesh, surfMesh, + domain->getGridDelta() * 2.0f); + post.setElementDataArrays(std::move(fluxResultVec)); + post.apply(); auto velocities = SmartPointer>::New( std::move(*pointData->getScalarData(fluxLabel))); velocityField->prepare(domain, velocities, 0.); diff --git a/gpu/benchmark/GPU_Benchmark.cpp b/gpu/benchmark/GPU_Benchmark.cpp index 642f43b6..1df0f15b 100644 --- a/gpu/benchmark/GPU_Benchmark.cpp +++ b/gpu/benchmark/GPU_Benchmark.cpp @@ -55,6 +55,9 @@ int main() { } tracer.prepareParticlePrograms(); + std::vector dataLabels = + std::get<0>(particleConfig).dataLabels; + std::cout << "Starting Triangle Benchmark\n"; for (auto gd : gridDeltaValues) { @@ -109,12 +112,14 @@ int main() { // POSTPROCESSING timer.start(); auto pointData = viennals::PointData::New(); + ElementToPointData post( + dataLabels, pointData, elementKdTree, diskMesh, surfMesh, + domain->getGridDelta() * 2.0f); + post.prepare(); tracer.normalizeResults(); + post.setElementDataArrays(tracer.getResults()); + post.convert(); // tracer.downloadResults(); - ElementToPointData( - IndexMap(tracer.getParticles()), tracer.getResults(), pointData, - elementKdTree, diskMesh, surfMesh, domain->getGridDelta() * 2.0f) - .apply(); auto velocities = SmartPointer>::New( std::move(*pointData->getScalarData("flux"))); velocityField->prepare(domain, velocities, 0.); diff --git a/include/viennaps/models/psFaradayCageEtching.hpp b/include/viennaps/models/psFaradayCageEtching.hpp index c8f75465..3a2418f9 100644 --- a/include/viennaps/models/psFaradayCageEtching.hpp +++ b/include/viennaps/models/psFaradayCageEtching.hpp @@ -27,6 +27,28 @@ template struct FaradayCageParameters { }; namespace impl { +template +auto getFaradayCageSourceDirections(NumericType tiltAngle, + NumericType cageAngle) { + NumericType cosTilt = std::cos(tiltAngle * M_PI / 180.); + NumericType sinTilt = std::sin(tiltAngle * M_PI / 180.); + NumericType cage_y = std::cos(cageAngle * M_PI / 180.); + NumericType cage_x = std::sin(cageAngle * M_PI / 180.); + + Vec3D direction1{-cosTilt * cage_x, cosTilt * cage_y, -sinTilt}; + if constexpr (D == 2) + std::swap(direction1[1], direction1[2]); + VIENNACORE_LOG_DEBUG("FaradayCageEtching: Source direction 1: " + + util::arrayToString(direction1)); + + Vec3D direction2{cosTilt * cage_x, -cosTilt * cage_y, -sinTilt}; + if constexpr (D == 2) + std::swap(direction2[1], direction2[2]); + VIENNACORE_LOG_DEBUG("FaradayCageEtching: Source direction 2: " + + util::arrayToString(direction2)); + + return std::make_pair(direction1, direction2); +} template class PeriodicSource : public viennaray::Source { @@ -40,33 +62,10 @@ class PeriodicSource : public viennaray::Source { zPos_(boundingBox[1][D - 1] + 2 * gridDelta), gridDelta_(gridDelta), ee_{2 / (cosinePower + 1)} { - NumericType cage_x = std::cos(cageAngle * M_PI / 180.); - NumericType cage_y = std::sin(cageAngle * M_PI / 180.); - NumericType cosTilt = std::cos(tiltAngle * M_PI / 180.); - NumericType sinTilt = std::sin(tiltAngle * M_PI / 180.); - - Vec3D direction; - direction[0] = -cosTilt * cage_y; - direction[1] = cosTilt * cage_x; - direction[2] = -sinTilt; - if constexpr (D == 2) - std::swap(direction[1], direction[2]); - - VIENNACORE_LOG_DEBUG("FaradayCageEtching: Source direction 1: " + - util::arrayToString(direction)); - - orthoBasis1_ = rayInternal::getOrthonormalBasis(direction); - - direction[0] = cosTilt * cage_y; - direction[1] = -cosTilt * cage_x; - direction[2] = -sinTilt; - if constexpr (D == 2) - std::swap(direction[1], direction[2]); - - VIENNACORE_LOG_DEBUG("FaradayCageEtching: Source direction 2: " + - util::arrayToString(direction)); - - orthoBasis2_ = rayInternal::getOrthonormalBasis(direction); + auto [direction1, direction2] = + getFaradayCageSourceDirections(tiltAngle, cageAngle); + orthoBasis1_ = rayInternal::getOrthonormalBasis(direction1); + orthoBasis2_ = rayInternal::getOrthonormalBasis(direction2); } std::array, 2> @@ -186,17 +185,14 @@ class FaradayCageEtching final : public ProcessModelGPU { const std::vector &maskMaterials) : params_(params), maskMaterials_(maskMaterials) { - NumericType cosTilt = std::cos(params.ibeParams.tiltAngle * M_PIf / 180.f); - NumericType sinTilt = std::sin(params.ibeParams.tiltAngle * M_PIf / 180.f); - NumericType cage_y = std::cos(params.cageAngle * M_PIf / 180.f); - NumericType cage_x = std::sin(params.cageAngle * M_PIf / 180.f); - + auto [direction1, direction2] = + ::viennaps::impl::getFaradayCageSourceDirections( + params.ibeParams.tiltAngle, params.cageAngle); viennaray::gpu::Particle particle1{ .name = "Ion1", .cosineExponent = params.ibeParams.exponent, .useCustomDirection = true, - .direction = - Vec3D{-cage_x * cosTilt, cage_y * cosTilt, -sinTilt}}; + .direction = direction1}; particle1.dataLabels.push_back( ::viennaps::impl::IBESurfaceModel::fluxLabel); if (params.ibeParams.redepositionRate > 0.) { @@ -209,8 +205,7 @@ class FaradayCageEtching final : public ProcessModelGPU { .name = "Ion2", .cosineExponent = params.ibeParams.exponent, .useCustomDirection = true, - .direction = - Vec3D{cage_x * cosTilt, -cage_y * cosTilt, -sinTilt}}; + .direction = direction2}; // NO ADDITIONAL FLUX ARRAY NEEDED, ALL PARTICLES WRITE TO THE SAME this->insertNextParticleType(particle2); diff --git a/include/viennaps/process/psCPUTriangleEngine.hpp b/include/viennaps/process/psCPUTriangleEngine.hpp index 5f5a73ab..7a5bc47d 100644 --- a/include/viennaps/process/psCPUTriangleEngine.hpp +++ b/include/viennaps/process/psCPUTriangleEngine.hpp @@ -18,6 +18,8 @@ class CPUTriangleEngine final : public FluxEngine { using KDTreeType = SmartPointer>>; using MeshType = SmartPointer>; + using PostProcessingType = + ElementToPointData; public: ProcessResult checkInput(ProcessContext &context) override { @@ -27,6 +29,7 @@ class CPUTriangleEngine final : public FluxEngine { VIENNACORE_LOG_WARNING("Invalid process model."); return ProcessResult::INVALID_INPUT; } + model_ = model; return ProcessResult::SUCCESS; } @@ -55,20 +58,17 @@ class CPUTriangleEngine final : public FluxEngine { if (!context.rayTracingParams.useRandomSeeds) rayTracer_.setRngSeed(context.rayTracingParams.rngSeed); - auto model = std::dynamic_pointer_cast>( - context.model); - - if (auto source = model->getSource()) { + if (auto source = model_->getSource()) { rayTracer_.setSource(source); VIENNACORE_LOG_INFO("Using custom source."); } - if (auto primaryDirection = model->getPrimaryDirection()) { + if (auto primaryDirection = model_->getPrimaryDirection()) { VIENNACORE_LOG_INFO("Using primary direction: " + util::arrayToString(primaryDirection.value())); rayTracer_.setPrimaryDirection(primaryDirection.value()); } - model->initializeParticleDataLogs(); + model_->initializeParticleDataLogs(); return ProcessResult::SUCCESS; } @@ -91,8 +91,8 @@ class CPUTriangleEngine final : public FluxEngine { if constexpr (D == 2) { viennaray::LineMesh lineMesh; lineMesh.gridDelta = static_cast(context.domain->getGridDelta()); - lineMesh.lines = surfaceMesh_->lines; - lineMesh.nodes = surfaceMesh_->nodes; + lineMesh.lines = std::move(surfaceMesh_->lines); + lineMesh.nodes = std::move(surfaceMesh_->nodes); lineMesh.minimumExtent = surfaceMesh_->minimumExtent; lineMesh.maximumExtent = surfaceMesh_->maximumExtent; @@ -218,14 +218,11 @@ class CPUTriangleEngine final : public FluxEngine { assert(fluxes != nullptr); fluxes->clear(); - auto model = std::dynamic_pointer_cast>( - context.model); - std::vector> elementFluxes; unsigned particleIdx = 0; - for (auto &particle : model->getParticleTypes()) { - int dataLogSize = model->getParticleLogSize(particleIdx); + for (auto &particle : model_->getParticleTypes()) { + int dataLogSize = model_->getParticleLogSize(particleIdx); if (dataLogSize > 0) { rayTracer_.getDataLog().data.resize(1); rayTracer_.getDataLog().data[0].resize(dataLogSize, 0.); @@ -267,17 +264,18 @@ class CPUTriangleEngine final : public FluxEngine { elementFluxes.push_back(std::move(flux)); } - model->mergeParticleData(rayTracer_.getDataLog(), particleIdx); + model_->mergeParticleData(rayTracer_.getDataLog(), particleIdx); ++particleIdx; } - // map fluxes on points - ElementToPointData( - IndexMap(model->getParticleTypes()), elementFluxes, fluxes, - elementKdTree_, context.diskMesh, surfaceMesh_, + // map fluxes to points + PostProcessingType postProcessing( + model_->getParticleDataLabels(), fluxes, elementKdTree_, + context.diskMesh, surfaceMesh_, context.domain->getGridDelta() * - (context.rayTracingParams.smoothingNeighbors + 1)) - .apply(); + (context.rayTracingParams.smoothingNeighbors + 1)); + postProcessing.setElementDataArrays(std::move(elementFluxes)); + postProcessing.apply(); } static viennaray::TracingData @@ -297,6 +295,7 @@ class CPUTriangleEngine final : public FluxEngine { private: viennaray::TraceTriangle rayTracer_; + SmartPointer> model_; KDTreeType elementKdTree_; MeshType surfaceMesh_; diff --git a/include/viennaps/process/psGPUDiskEngine.hpp b/include/viennaps/process/psGPUDiskEngine.hpp index e85fbfbe..d56418cb 100644 --- a/include/viennaps/process/psGPUDiskEngine.hpp +++ b/include/viennaps/process/psGPUDiskEngine.hpp @@ -41,31 +41,15 @@ class GPUDiskEngine final : public FluxEngine { return ProcessResult::INVALID_INPUT; } + model_ = model; + return ProcessResult::SUCCESS; } ProcessResult initialize(ProcessContext &context) override { - auto model = - std::dynamic_pointer_cast>( - context.model); if (!rayTracerInitialized_) { - // Check for periodic boundary conditions - bool periodicBoundary = false; - if (context.rayTracingParams.ignoreFluxBoundaries) { - rayTracer_.setIgnoreBoundary(true); - } else { - const auto &grid = context.domain->getGrid(); - for (unsigned i = 0; i < D; ++i) { - if (grid.getBoundaryConditions(i) == - viennals::BoundaryConditionEnum::PERIODIC_BOUNDARY) { - periodicBoundary = true; - break; - } - } - } - - rayTracer_.setParticleCallableMap(model->getParticleCallableMap()); - rayTracer_.setCallables(model->getCallableFileName(), + rayTracer_.setParticleCallableMap(model_->getParticleCallableMap()); + rayTracer_.setCallables(model_->getCallableFileName(), deviceContext_->modulePath); rayTracer_.setNumberOfRaysPerPoint(context.rayTracingParams.raysPerPoint); rayTracer_.setMaxBoundaryHits(context.rayTracingParams.maxBoundaryHits); @@ -74,15 +58,20 @@ class GPUDiskEngine final : public FluxEngine { rayTracer_.setUseRandomSeeds(context.rayTracingParams.useRandomSeeds); if (!context.rayTracingParams.useRandomSeeds) rayTracer_.setRngSeed(context.rayTracingParams.rngSeed); - rayTracer_.setPeriodicBoundary(periodicBoundary); - rayTracer_.setIgnoreBoundary( - context.rayTracingParams.ignoreFluxBoundaries); - for (auto &particle : model->getParticleTypes()) { + for (auto &particle : model_->getParticleTypes()) { rayTracer_.insertNextParticle(particle); } + + // Check boundary conditions + if (context.rayTracingParams.ignoreFluxBoundaries) { + rayTracer_.setIgnoreBoundary(true); + } else if (context.flags.domainHasPeriodicBoundaries) { + rayTracer_.setPeriodicBoundary(true); + } + rayTracer_.prepareParticlePrograms(); } - rayTracer_.setParameters(model->getProcessDataDPtr()); + rayTracer_.setParameters(model_->getProcessDataDPtr()); rayTracerInitialized_ = true; return ProcessResult::SUCCESS; @@ -135,10 +124,7 @@ class GPUDiskEngine final : public FluxEngine { rayTracer_.setGeometry(diskMeshRay); - auto model = - std::dynamic_pointer_cast>( - context.model); - if (model->useMaterialIds()) { + if (model_->useMaterialIds()) { auto const &materialIds = *diskMesh->getCellData().getScalarData("MaterialIds"); rayTracer_.setMaterialIds(materialIds); @@ -154,13 +140,10 @@ class GPUDiskEngine final : public FluxEngine { SmartPointer> &fluxes) override { this->timer_.start(); - auto model = - std::dynamic_pointer_cast>( - context.model); CudaBuffer d_coverages; // device buffer for coverages if (context.flags.useCoverages) { - auto coverages = model->getSurfaceModel()->getCoverages(); + auto coverages = model_->getSurfaceModel()->getCoverages(); assert(coverages); assert(context.diskMesh); auto numCov = coverages->getScalarDataSize(); @@ -187,7 +170,7 @@ class GPUDiskEngine final : public FluxEngine { // output if (Logger::hasIntermediate()) { if (context.flags.useCoverages) { - auto coverages = model->getSurfaceModel()->getCoverages(); + auto coverages = model_->getSurfaceModel()->getCoverages(); downloadCoverages(d_coverages, context.diskMesh->getCellData(), coverages, context.diskMesh->getNodes().size()); } @@ -254,6 +237,7 @@ class GPUDiskEngine final : public FluxEngine { private: std::shared_ptr deviceContext_; viennaray::gpu::TraceDisk rayTracer_; + SmartPointer> model_; bool rayTracerInitialized_ = false; }; diff --git a/include/viennaps/process/psGPULineEngine.hpp b/include/viennaps/process/psGPULineEngine.hpp index 69fa0010..4eed6957 100644 --- a/include/viennaps/process/psGPULineEngine.hpp +++ b/include/viennaps/process/psGPULineEngine.hpp @@ -50,31 +50,15 @@ class GPULineEngine final : public FluxEngine { return ProcessResult::INVALID_INPUT; } + model_ = model; + return ProcessResult::SUCCESS; } ProcessResult initialize(ProcessContext &context) override { - auto model = - std::dynamic_pointer_cast>( - context.model); if (!rayTracerInitialized_) { - // Check for periodic boundary conditions - bool periodicBoundary = false; - if (context.rayTracingParams.ignoreFluxBoundaries) { - rayTracer_.setIgnoreBoundary(true); - } else { - const auto &grid = context.domain->getGrid(); - for (unsigned i = 0; i < D; ++i) { - if (grid.getBoundaryConditions(i) == - viennals::BoundaryConditionEnum::PERIODIC_BOUNDARY) { - periodicBoundary = true; - break; - } - } - } - - rayTracer_.setParticleCallableMap(model->getParticleCallableMap()); - rayTracer_.setCallables(model->getCallableFileName(), + rayTracer_.setParticleCallableMap(model_->getParticleCallableMap()); + rayTracer_.setCallables(model_->getCallableFileName(), deviceContext_->modulePath); rayTracer_.setNumberOfRaysPerPoint(context.rayTracingParams.raysPerPoint); rayTracer_.setMaxBoundaryHits(context.rayTracingParams.maxBoundaryHits); @@ -83,15 +67,20 @@ class GPULineEngine final : public FluxEngine { rayTracer_.setUseRandomSeeds(context.rayTracingParams.useRandomSeeds); if (!context.rayTracingParams.useRandomSeeds) rayTracer_.setRngSeed(context.rayTracingParams.rngSeed); - rayTracer_.setPeriodicBoundary(periodicBoundary); - rayTracer_.setIgnoreBoundary( - context.rayTracingParams.ignoreFluxBoundaries); - for (auto &particle : model->getParticleTypes()) { + for (auto &particle : model_->getParticleTypes()) { rayTracer_.insertNextParticle(particle); } + + // Check boundary conditions + if (context.rayTracingParams.ignoreFluxBoundaries) { + rayTracer_.setIgnoreBoundary(true); + } else if (context.flags.domainHasPeriodicBoundaries) { + rayTracer_.setPeriodicBoundary(true); + } + rayTracer_.prepareParticlePrograms(); } - rayTracer_.setParameters(model->getProcessDataDPtr()); + rayTracer_.setParameters(model_->getProcessDataDPtr()); rayTracerInitialized_ = true; return ProcessResult::SUCCESS; @@ -140,10 +129,7 @@ class GPULineEngine final : public FluxEngine { surfaceMesh_->minimumExtent = lineMesh.minimumExtent; surfaceMesh_->maximumExtent = lineMesh.maximumExtent; - auto model = - std::dynamic_pointer_cast>( - context.model); - if (model->useMaterialIds()) { + if (model_->useMaterialIds()) { auto const &pointMaterialIds = diskMesh->getCellData().getScalarData("MaterialIds"); std::vector lineMaterialIds(surfaceMesh_->lines.size()); @@ -174,9 +160,6 @@ class GPULineEngine final : public FluxEngine { ProcessContext &context, SmartPointer> &fluxes) override { this->timer_.start(); - auto model = - std::dynamic_pointer_cast>( - context.model); std::vector> elementCenters(surfaceMesh_->lines.size()); for (int i = 0; i < surfaceMesh_->lines.size(); ++i) { @@ -194,7 +177,7 @@ class GPULineEngine final : public FluxEngine { auto diskMesh = *context.diskMesh; CudaBuffer d_coverages; // device buffer for coverages if (context.flags.useCoverages) { - auto coverages = model->getSurfaceModel()->getCoverages(); + auto coverages = model_->getSurfaceModel()->getCoverages(); assert(coverages); assert(context.diskMesh); auto numCov = coverages->getScalarDataSize(); @@ -238,7 +221,7 @@ class GPULineEngine final : public FluxEngine { // output if (Logger::hasIntermediate()) { if (context.flags.useCoverages) { - auto coverages = model->getSurfaceModel()->getCoverages(); + auto coverages = model_->getSurfaceModel()->getCoverages(); downloadCoverages(d_coverages, context.diskMesh->getCellData(), coverages, context.diskMesh->getNodes().size()); } @@ -377,6 +360,7 @@ class GPULineEngine final : public FluxEngine { private: std::shared_ptr deviceContext_; viennaray::gpu::TraceLine rayTracer_; + SmartPointer> model_; KDTreeType elementKdTree_; MeshType surfaceMesh_; diff --git a/include/viennaps/process/psGPUTriangleEngine.hpp b/include/viennaps/process/psGPUTriangleEngine.hpp index be7dad1f..8fc095a6 100644 --- a/include/viennaps/process/psGPUTriangleEngine.hpp +++ b/include/viennaps/process/psGPUTriangleEngine.hpp @@ -15,6 +15,8 @@ #include +#include + namespace viennaps { using namespace viennacore; @@ -24,10 +26,16 @@ class GPUTriangleEngine final : public FluxEngine { using KDTreeType = SmartPointer>>; using MeshType = SmartPointer>; + using PostProcessingType = + ElementToPointData; public: explicit GPUTriangleEngine(std::shared_ptr deviceContext) - : deviceContext_(deviceContext), rayTracer_(deviceContext) {} + : deviceContext_(deviceContext), rayTracer_(deviceContext) { + surfaceMesh_ = MeshType::New(); + elementKdTree_ = KDTreeType::New(); + } ProcessResult checkInput(ProcessContext &context) override { @@ -50,31 +58,16 @@ class GPUTriangleEngine final : public FluxEngine { return ProcessResult::INVALID_INPUT; } + model_ = model; + return ProcessResult::SUCCESS; } ProcessResult initialize(ProcessContext &context) override { - auto model = - std::dynamic_pointer_cast>( - context.model); + assert(model_ && "Model not set."); if (!rayTracerInitialized_) { - // Check for periodic boundary conditions - bool periodicBoundary = false; - if (context.rayTracingParams.ignoreFluxBoundaries) { - rayTracer_.setIgnoreBoundary(true); - } else { - const auto &grid = context.domain->getGrid(); - for (unsigned i = 0; i < D; ++i) { - if (grid.getBoundaryConditions(i) == - viennals::BoundaryConditionEnum::PERIODIC_BOUNDARY) { - periodicBoundary = true; - break; - } - } - } - - rayTracer_.setParticleCallableMap(model->getParticleCallableMap()); - rayTracer_.setCallables(model->getCallableFileName(), + rayTracer_.setParticleCallableMap(model_->getParticleCallableMap()); + rayTracer_.setCallables(model_->getCallableFileName(), deviceContext_->modulePath); rayTracer_.setNumberOfRaysPerPoint(context.rayTracingParams.raysPerPoint); rayTracer_.setUseRandomSeeds(context.rayTracingParams.useRandomSeeds); @@ -83,15 +76,20 @@ class GPUTriangleEngine final : public FluxEngine { rayTracer_.setMaxReflections(context.rayTracingParams.maxReflections); if (!context.rayTracingParams.useRandomSeeds) rayTracer_.setRngSeed(context.rayTracingParams.rngSeed); - rayTracer_.setPeriodicBoundary(periodicBoundary); - rayTracer_.setIgnoreBoundary( - context.rayTracingParams.ignoreFluxBoundaries); - for (auto &particle : model->getParticleTypes()) { + for (auto &particle : model_->getParticleTypes()) { rayTracer_.insertNextParticle(particle); } + + // Check boundary conditions + if (context.rayTracingParams.ignoreFluxBoundaries) { + rayTracer_.setIgnoreBoundary(true); + } else if (context.flags.domainHasPeriodicBoundaries) { + rayTracer_.setPeriodicBoundary(true); + } + rayTracer_.prepareParticlePrograms(); } - rayTracer_.setParameters(model->getProcessDataDPtr()); + rayTracer_.setParameters(model_->getProcessDataDPtr()); rayTracerInitialized_ = true; return ProcessResult::SUCCESS; @@ -100,11 +98,9 @@ class GPUTriangleEngine final : public FluxEngine { ProcessResult updateSurface(ProcessContext &context) override { this->timer_.start(); - if (!surfaceMesh_) - surfaceMesh_ = MeshType::New(); - if (!elementKdTree_) - elementKdTree_ = KDTreeType::New(); + assert(surfaceMesh_ && "Surface mesh not initialized."); + assert(elementKdTree_ && "Element KDTree not initialized."); CreateSurfaceMesh( context.domain->getSurface(), surfaceMesh_, elementKdTree_, 1e-12, context.rayTracingParams.minNodeDistanceFactor) @@ -116,8 +112,8 @@ class GPUTriangleEngine final : public FluxEngine { if constexpr (D == 2) { viennaray::LineMesh lineMesh; lineMesh.gridDelta = static_cast(context.domain->getGridDelta()); - lineMesh.lines = surfaceMesh_->lines; - lineMesh.nodes = surfaceMesh_->nodes; + lineMesh.lines = std::move(surfaceMesh_->lines); + lineMesh.nodes = std::move(surfaceMesh_->nodes); lineMesh.minimumExtent = surfaceMesh_->minimumExtent; lineMesh.maximumExtent = surfaceMesh_->maximumExtent; assert(!lineMesh.lines.empty()); @@ -156,10 +152,7 @@ class GPUTriangleEngine final : public FluxEngine { surfaceMesh_->maximumExtent = triangleMesh.maximumExtent; } - auto model = - std::dynamic_pointer_cast>( - context.model); - if (model->useMaterialIds()) { + if (model_->useMaterialIds()) { auto const &pointMaterialIds = *context.diskMesh->getCellData().getScalarData("MaterialIds"); std::vector elementMaterialIds; @@ -190,13 +183,10 @@ class GPUTriangleEngine final : public FluxEngine { SmartPointer> &fluxes) override { this->timer_.start(); - auto model = - std::dynamic_pointer_cast>( - context.model); CudaBuffer d_coverages; // device buffer for coverages if (context.flags.useCoverages) { - auto coverages = model->getSurfaceModel()->getCoverages(); + auto coverages = model_->getSurfaceModel()->getCoverages(); assert(coverages); assert(context.diskMesh); assert(context.translationField); @@ -217,22 +207,24 @@ class GPUTriangleEngine final : public FluxEngine { } // run the ray tracer - rayTracer_.apply(); - rayTracer_.normalizeResults(); - - // extract fluxes on points - ElementToPointData( - IndexMap(rayTracer_.getParticles()), rayTracer_.getResults(), fluxes, - elementKdTree_, context.diskMesh, surfaceMesh_, + rayTracer_.apply(); // device detach point here + + // Prepare post-processing + PostProcessingType postProcessing( + model_->getParticleDataLabels(), fluxes, elementKdTree_, + context.diskMesh, surfaceMesh_, context.domain->getGridDelta() * - (context.rayTracingParams.smoothingNeighbors + 1)) - .apply(); + (context.rayTracingParams.smoothingNeighbors + 1)); + postProcessing.prepare(); + + rayTracer_.normalizeResults(); // device sync point here + postProcessing.setElementDataArrays(rayTracer_.getResults()); + postProcessing.convert(); // run post-processing // output if (Logger::hasIntermediate()) { if (context.flags.useCoverages) { - auto coverages = model->getSurfaceModel()->getCoverages(); + auto coverages = model_->getSurfaceModel()->getCoverages(); downloadCoverages(d_coverages, surfaceMesh_->getCellData(), coverages, surfaceMesh_->getElements<3>().size()); } @@ -299,6 +291,7 @@ class GPUTriangleEngine final : public FluxEngine { private: std::shared_ptr deviceContext_; viennaray::gpu::TraceTriangle rayTracer_; + SmartPointer> model_; KDTreeType elementKdTree_; MeshType surfaceMesh_; diff --git a/include/viennaps/process/psProcess.hpp b/include/viennaps/process/psProcess.hpp index b2039d4d..154e3886 100644 --- a/include/viennaps/process/psProcess.hpp +++ b/include/viennaps/process/psProcess.hpp @@ -86,13 +86,9 @@ template class Process { void apply() { - if (!checkInput()) + if (!checkInputUpdateContext()) return; - // Update context with current state - context_.updateFlags(); - context_.printFlags(); - // Find appropriate strategy auto strategy = findStrategy(); if (!strategy) { @@ -120,10 +116,9 @@ template class Process { } SmartPointer> calculateFlux() { - if (!checkInput()) + if (!checkInputUpdateContext()) return nullptr; - context_.updateFlags(); const auto name = context_.getProcessName(); if (!context_.flags.useFluxEngine) { VIENNACORE_LOG_ERROR("Process model '" + name + @@ -267,7 +262,7 @@ template class Process { #endif public: - bool checkInput() { + bool checkInputUpdateContext() { if (!context_.domain) { VIENNACORE_LOG_ERROR("No domain passed to Process."); return false; @@ -283,10 +278,19 @@ template class Process { return false; } + // Update context with current state + context_.updateFlags(); + context_.printFlags(); + // Auto-select engine type if (fluxEngineType_ == FluxEngineType::AUTO) { if (gpuAvailable() && context_.model->hasGPUModel()) { - fluxEngineType_ = FluxEngineType::GPU_TRIANGLE; + // Prefer disks if boundary is periodic in any direction + if (context_.flags.domainHasPeriodicBoundaries) { + fluxEngineType_ = FluxEngineType::GPU_DISK; + } else { + fluxEngineType_ = FluxEngineType::GPU_TRIANGLE; + } } else { fluxEngineType_ = FluxEngineType::CPU_DISK; } diff --git a/include/viennaps/process/psProcessContext.hpp b/include/viennaps/process/psProcessContext.hpp index 3de17c7f..87144cae 100644 --- a/include/viennaps/process/psProcessContext.hpp +++ b/include/viennaps/process/psProcessContext.hpp @@ -48,9 +48,12 @@ template struct ProcessContext { bool isALP = false; bool isAnalytic = false; bool isGeometric = false; + bool domainHasPeriodicBoundaries = false; } flags; void updateFlags() { + assert(model && "Process model must be set before updating flags."); + assert(domain && "Domain must be set before updating flags."); flags.isGeometric = model->getGeometricModel() != nullptr; flags.useFluxEngine = model->useFluxEngine(); flags.useAdvectionCallback = model->getAdvectionCallback() != nullptr; @@ -60,6 +63,15 @@ template struct ProcessContext { flags.isAnalytic = model->getVelocityField() && !model->useFluxEngine(); flags.isALP = model->isALPModel(); flags.useCoverages = flags.isALP || flags.useCoverages; + + const auto &grid = domain->getGrid(); + for (unsigned i = 0; i < D; ++i) { + if (grid.getBoundaryConditions(i) == + viennals::BoundaryConditionEnum::PERIODIC_BOUNDARY) { + flags.domainHasPeriodicBoundaries = true; + break; + } + } } void printFlags() const { @@ -76,7 +88,9 @@ template struct ProcessContext { << util::boolString(flags.useProcessParams) << "\n\tuseCoverages: " << util::boolString(flags.useCoverages) << "\n\tisALP: " << util::boolString(flags.isALP) - << "\n\tisAnalytic: " << util::boolString(flags.isAnalytic); + << "\n\tisAnalytic: " << util::boolString(flags.isAnalytic) + << "\n\tdomainHasPeriodicBoundaries: " + << util::boolString(flags.domainHasPeriodicBoundaries); if (model) { stream << "\nProcess Name: " << model->getProcessName().value_or("default"); diff --git a/include/viennaps/process/psProcessModel.hpp b/include/viennaps/process/psProcessModel.hpp index e8b7a961..2ed6a471 100644 --- a/include/viennaps/process/psProcessModel.hpp +++ b/include/viennaps/process/psProcessModel.hpp @@ -156,6 +156,17 @@ class ProcessModelCPU : public ProcessModelBase { void setSource(SmartPointer> passedSource) { source = passedSource; } + + auto getParticleDataLabels() const { + std::vector dataLabels; + for (size_t pIdx = 0; pIdx < particles.size(); pIdx++) { + auto labels = particles[pIdx]->getLocalDataLabels(); + for (size_t dIdx = 0; dIdx < labels.size(); dIdx++) { + dataLabels.push_back(labels[dIdx]); + } + } + return dataLabels; + } }; } // namespace viennaps @@ -218,6 +229,16 @@ class ProcessModelGPU : public ProcessModelBase { setPrimaryDirection(const std::array passedPrimaryDirection) { primaryDirection = Normalize(passedPrimaryDirection); } + + auto getParticleDataLabels() const { + std::vector dataLabels; + for (size_t pIdx = 0; pIdx < particles.size(); pIdx++) { + for (size_t dIdx = 0; dIdx < particles[pIdx].dataLabels.size(); dIdx++) { + dataLabels.push_back(particles[pIdx].dataLabels[dIdx]); + } + } + return dataLabels; + } }; } // namespace viennaps::gpu diff --git a/include/viennaps/psElementToPointData.hpp b/include/viennaps/psElementToPointData.hpp index ece162ce..68564665 100644 --- a/include/viennaps/psElementToPointData.hpp +++ b/include/viennaps/psElementToPointData.hpp @@ -13,95 +13,43 @@ namespace viennaps { using namespace viennacore; -class IndexMap { - std::vector dataLabels; - -public: - IndexMap() = default; - -#ifdef VIENNACORE_COMPILE_GPU - template - explicit IndexMap(const std::vector> &particles) { - for (size_t pIdx = 0; pIdx < particles.size(); pIdx++) { - for (size_t dIdx = 0; dIdx < particles[pIdx].dataLabels.size(); dIdx++) { - dataLabels.push_back(particles[pIdx].dataLabels[dIdx]); - } - } - } -#endif - - template - explicit IndexMap( - const std::vector>> - &particles) { - for (size_t pIdx = 0; pIdx < particles.size(); pIdx++) { - auto labels = particles[pIdx]->getLocalDataLabels(); - for (size_t dIdx = 0; dIdx < labels.size(); dIdx++) { - dataLabels.push_back(labels[dIdx]); - } - } - } - - void insertNextDataLabel(std::string dataLabel) { - dataLabels.push_back(std::move(dataLabel)); - } - - std::size_t getIndex(const std::string &label) const { - for (std::size_t idx = 0; idx < dataLabels.size(); idx++) { - if (dataLabels[idx] == label) { - return idx; - } - } - assert(false && "Data label not found"); - return 0; - } - - [[nodiscard]] const std::string &getLabel(std::size_t idx) const { - assert(idx < dataLabels.size()); - return dataLabels[idx]; - } - - std::size_t getNumberOfData() const { return dataLabels.size(); } - - std::vector::const_iterator begin() const { - return dataLabels.cbegin(); - } - - std::vector::const_iterator end() const { - return dataLabels.cend(); - } -}; - template class ElementToPointData { - const IndexMap indexMap_; - std::vector> const &elementDataArrays_; + const std::vector dataLabels_; SmartPointer> pointData_; SmartPointer>> elementKdTree_; SmartPointer> diskMesh_; SmartPointer> surfaceMesh_; const NumericType conversionRadius_; + std::vector> elementDataArrays_; + std::vector, std::vector, unsigned>> + closeElements_; + static constexpr bool discard2 = d2; static constexpr bool discard4 = d4; public: ElementToPointData( - IndexMap indexMap, - std::vector> const &elementDataArrays, - SmartPointer> pointData, + const std::vector &dataLabels, + SmartPointer> + pointData, // target point data SmartPointer>> elementKdTree, SmartPointer> diskMesh, SmartPointer> surfMesh, const NumericType conversionRadius) - : indexMap_(std::move(indexMap)), elementDataArrays_(elementDataArrays), - pointData_(pointData), elementKdTree_(elementKdTree), - diskMesh_(diskMesh), surfaceMesh_(surfMesh), - conversionRadius_(conversionRadius) {} + : dataLabels_(dataLabels), pointData_(pointData), + elementKdTree_(elementKdTree), diskMesh_(diskMesh), + surfaceMesh_(surfMesh), conversionRadius_(conversionRadius) {} - void apply() { - const auto numData = indexMap_.getNumberOfData(); + void setElementDataArrays( + std::vector> &&elementDataArrays) { + elementDataArrays_ = std::move(elementDataArrays); + } + + void prepare() { + const auto numData = dataLabels_.size(); const auto &points = diskMesh_->nodes; const auto numPoints = points.size(); const auto numElements = elementKdTree_->getNumberOfPoints(); @@ -110,16 +58,12 @@ class ElementToPointData { // prepare point data container pointData_->clear(); - for (const auto &label : indexMap_) { + for (const auto &label : dataLabels_) { std::vector data(numPoints, 0.); pointData_->insertNextScalarData(std::move(data), label); } - if (elementDataArrays_.size() != numData) { - VIENNACORE_LOG_ERROR( - "ElementToPointData: " - "Number of data arrays does not match expected count."); - } + closeElements_.resize(numPoints); #pragma omp parallel for schedule(static) for (unsigned i = 0; i < numPoints; i++) { @@ -130,6 +74,7 @@ class ElementToPointData { .value(); std::vector weights(closeElements.size(), 0.); + std::vector elementIndices(closeElements.size(), 0); // compute weights based on normal alignment unsigned numClosePoints = 0; @@ -142,47 +87,57 @@ class ElementToPointData { if (weight > 1e-6 && !std::isnan(weight)) { weights[n] = weight; + elementIndices[n] = p.first; ++numClosePoints; } } - std::size_t nearestIdx = 0; - if (numClosePoints == 0) { // fallback to nearest point - auto nearestPoint = elementKdTree_->findNearest(points[i]); - nearestIdx = nearestPoint->first; - } + assert(!weights.empty() && !elementIndices.empty()); + closeElements_[i] = + std::make_tuple(elementIndices, weights, numClosePoints); + } + } + + void convert() { + const auto numData = dataLabels_.size(); + const auto &points = diskMesh_->nodes; + const auto numPoints = points.size(); + + if (elementDataArrays_.size() != numData) { + VIENNACORE_LOG_ERROR( + "ElementToPointData: " + "Number of data arrays does not match expected count."); + } + +#pragma omp parallel for schedule(static) + for (unsigned i = 0; i < numPoints; i++) { + + const auto &[closeElements, weights, numClosePoints] = closeElements_[i]; for (unsigned j = 0; j < numData; ++j) { NumericType value = NumericType(0); const auto &elementData = elementDataArrays_[j]; auto pointData = pointData_->getScalarData(j); - if (numClosePoints > 0) { - - // Discard outlier values if enabled - const auto weightsCopy = discardOutliers(weights, closeElements, - elementData, numClosePoints); + // Discard outlier values if enabled + const auto weightsCopy = discardOutliers(weights, closeElements, + elementData, numClosePoints); - // Compute weighted average - const double sum = - std::accumulate(weightsCopy.cbegin(), weightsCopy.cend(), 0.0); + // Compute weighted average + const double sum = + std::accumulate(weightsCopy.cbegin(), weightsCopy.cend(), 0.0); - if (sum > 1e-6) { - for (std::size_t k = 0; k < closeElements.size(); ++k) { - if (weightsCopy[k] > 0.0) { - value += weightsCopy[k] * elementData[closeElements[k].first]; - } + if (sum > 1e-6) { + for (size_t k = 0; k < closeElements.size(); ++k) { + if (weightsCopy[k] > 0.0) { + value += weightsCopy[k] * elementData[closeElements[k]]; } - value /= sum; - } else { - // Fallback if all weights were discarded - auto nearestPoint = elementKdTree_->findNearest(points[i]); - value = elementData[nearestPoint->first]; } + value /= sum; } else { - // Fallback to nearest point - assert(numClosePoints == 0); - value = elementData[nearestIdx]; + // Fallback if all weights were discarded + auto nearestPoint = elementKdTree_->findNearest(points[i]); + value = elementData[nearestPoint->first]; } pointData->at(i) = value; @@ -190,6 +145,11 @@ class ElementToPointData { } } + void apply() { + prepare(); + convert(); + } + private: // Helper function to find min/max values and their indices struct MinMaxInfo { @@ -202,15 +162,15 @@ class ElementToPointData { minIdx1(-1), minIdx2(-1), maxIdx1(-1), maxIdx2(-1) {} }; - static MinMaxInfo findMinMaxValues( - const std::vector &weights, - const std::vector> &closePoints, - const std::vector &elementData) { + static MinMaxInfo + findMinMaxValues(const std::vector &weights, + const std::vector &closePoints, + const std::vector &elementData) { MinMaxInfo info; for (std::size_t k = 0; k < closePoints.size(); ++k) { if (weights[k] > 0.0) { - const auto value = elementData[closePoints[k].first]; + const auto value = elementData[closePoints[k]]; // Update min values if (value < info.min1) { @@ -239,10 +199,10 @@ class ElementToPointData { return info; } - static auto discardOutliers( - const std::vector &weights, - const std::vector> &closePoints, - const std::vector &elementData, unsigned numClosePoints) { + static auto discardOutliers(const std::vector &weights, + const std::vector &closePoints, + const std::vector &elementData, + unsigned numClosePoints) { // copy weights to modify auto weightsCopy = weights; diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 53fca00e..4ad51788 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -86,10 +86,14 @@ if(VIENNAPS_USE_GPU) add_dependencies(${VIENNAPS_PYTHON_MODULE_NAME} ${VIENNAPS_GPU_DEPENDENCIES}) # Set PTX path override for runtime kernel loading from site-packages//ptx. - # ABSOLUTE path enforced with dynamic module path. - target_compile_definitions( - ${VIENNAPS_PYTHON_MODULE_NAME} PRIVATE VIENNACORE_KERNELS_PATH_OVERRIDE=ptx - VIENNACORE_DYNAMIC_MODULE_PATH=1) + # ABSOLUTE path enforced with dynamic module path. Only works on Linux based systems. + # On Windows the dynamic path is relative to the python3xx.dll location which is not ideal. + # So the PTX files are used from the build directory on Windows. + if(NOT MSVC) + target_compile_definitions( + ${VIENNAPS_PYTHON_MODULE_NAME} PRIVATE VIENNACORE_KERNELS_PATH_OVERRIDE=ptx + VIENNACORE_DYNAMIC_MODULE_PATH=1) + endif() # Install PTX files install(DIRECTORY "${VIENNACORE_NVCC_PTX_DIR}/" DESTINATION "${VIENNAPS_PYTHON_PACKAGE_NAME}/ptx") diff --git a/python/pyWrap.cpp b/python/pyWrap.cpp index 67bdafd0..47328e16 100644 --- a/python/pyWrap.cpp +++ b/python/pyWrap.cpp @@ -19,11 +19,12 @@ struct MaterialInfoPy { PYBIND11_MODULE(VIENNAPS_MODULE_NAME, module) { module.doc() = - "ViennaPS is a header-only C++ process simulation library which " - "includes surface and volume representations, a ray tracer, and physical " - "models for the simulation of microelectronic fabrication processes. The " - "main design goals are simplicity and efficiency, tailored towards " - "scientific simulations."; + "ViennaPS is a topography simulation library for microelectronic " + "fabrication processes. It models the evolution of 2D and 3D surfaces " + "during etching, deposition, and related steps, combining advanced " + "level-set methods for surface evolution with Monte Carlo ray tracing " + "for flux calculation. This allows accurate, feature-scale simulation of " + "complex fabrication geometries."; // set version string of python module module.attr("__version__") = versionString(); diff --git a/python/scripts/install_ViennaPS_linux.py b/python/scripts/install_ViennaPS.py similarity index 54% rename from python/scripts/install_ViennaPS_linux.py rename to python/scripts/install_ViennaPS.py index 36b2010e..67f740bc 100644 --- a/python/scripts/install_ViennaPS_linux.py +++ b/python/scripts/install_ViennaPS.py @@ -1,37 +1,46 @@ #!/usr/bin/env python3 import argparse import os +import platform import shutil import subprocess import sys import tempfile from pathlib import Path -REQUIRED_GCC = "12" REQUIRED_NVCC_MAJOR = 12 DEFAULT_VIENNALS_VERSION = "5.4.0" +# Detect OS +IS_WINDOWS = sys.platform == "win32" or os.name == "nt" +IS_LINUX = sys.platform.startswith("linux") +OS_NAME = platform.system() + +# Global variable to store required GCC version (determined at runtime) +REQUIRED_GCC = None + def run(cmd, **kwargs): - print("+", " ".join(cmd)) + print("+", " ".join(str(c) for c in cmd)) return subprocess.run(cmd, check=True, **kwargs) def run_capture(cmd, **kwargs): - print("+", " ".join(cmd)) + print("+", " ".join(str(c) for c in cmd)) return subprocess.run( cmd, check=True, stdout=subprocess.PIPE, text=True, **kwargs ).stdout.strip() -def which_or_fail(name): +def which_or_fail(name: str) -> str: p = shutil.which(name) if not p: - sys.exit(f"{name} is required but not found in PATH.") + sys.exit(f"{name} is required but was not found in PATH.") return p def parse_nvcc_version(): + """Parse nvcc version and return (major, minor, full_string).""" out = run_capture(["nvcc", "--version"]) # look for "release 12.3" etc. for line in out.splitlines(): @@ -39,29 +48,155 @@ def parse_nvcc_version(): part = line.split("release", 1)[1].strip().split(",")[0].strip() # part like "12.3" or "12.0" try: - major = int(part.split(".")[0]) - return major, part + version_parts = part.split(".") + major = int(version_parts[0]) + minor = int(version_parts[1]) if len(version_parts) > 1 else 0 + return major, minor, part except Exception: break sys.exit("Could not parse nvcc version. Need CUDA >= 12.0.") -def ensure_git(): - which_or_fail("git") +def determine_required_gcc_version( + nvcc_major: int, nvcc_minor: int +) -> list[str] | None: + """ + Determine the acceptable GCC versions based on nvcc version. + + Rules: + - nvcc < 12.4: gcc 11 or 12 + - 12.4 <= nvcc < 12.8: gcc 11, 12, or 13 + - nvcc >= 12.8: any gcc version is ok (return None) + + Returns: + List of acceptable GCC versions (e.g., ["11", "12"]) or None if any version is ok. + """ + nvcc_version = nvcc_major * 10 + nvcc_minor + + if nvcc_version < 124: + return ["11", "12"] + elif nvcc_version < 128: + return ["11", "12", "13"] + else: + return None # Any GCC version is ok + + +def get_default_gcc_version() -> tuple[int, int] | None: + """ + Get the default gcc version (major, minor). + Returns None if gcc is not found or version cannot be parsed. + """ + gcc_path = shutil.which("gcc") + if not gcc_path: + return None + + try: + out = run_capture(["gcc", "--version"]) + # First line typically contains version, e.g., "gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0" + first_line = out.splitlines()[0] + # Look for version pattern like "11.4.0" + import re + + match = re.search(r"(\d+)\.(\d+)", first_line) + if match: + major = int(match.group(1)) + minor = int(match.group(2)) + return major, minor + except Exception: + pass + + return None def ensure_compilers(): - which_or_fail(f"gcc-{REQUIRED_GCC}") - which_or_fail(f"g++-{REQUIRED_GCC}") + """ + Check for required compilers based on OS. + """ + if IS_WINDOWS: + ensure_msvc() + else: + ensure_gcc_and_gpp() + + +def ensure_gcc_and_gpp(): + """ + Check that GCC and G++ are available (Linux). + If specific versions are acceptable based on nvcc, ensure at least one is available. + """ + global REQUIRED_GCC + + if REQUIRED_GCC is None: + # No specific version required, just check that gcc exists + which_or_fail("gcc") + which_or_fail("g++") + default_gcc = get_default_gcc_version() + if default_gcc: + print(f"Using default GCC version: {default_gcc[0]}.{default_gcc[1]}") + else: + print("Using default GCC version (version detection failed)") + else: + # Check if any of the acceptable versions is available + found_version = None + for gcc_ver in REQUIRED_GCC: + if shutil.which(f"gcc-{gcc_ver}") and shutil.which(f"g++-{gcc_ver}"): + found_version = gcc_ver + break + + if found_version is None: + # None of the versioned compilers found, check if default gcc is compatible + default_gcc = get_default_gcc_version() + if default_gcc and str(default_gcc[0]) in REQUIRED_GCC: + found_version = str(default_gcc[0]) + print( + f"Using default GCC-{found_version} (compatible with CUDA version)" + ) + else: + sys.exit( + f"ERROR: None of the required GCC versions {REQUIRED_GCC} found.\n" + f"Please install one of: " + + ", ".join([f"gcc-{v}/g++-{v}" for v in REQUIRED_GCC]) + ) + else: + print(f"Using GCC-{found_version} (compatible with CUDA version)") + # Update REQUIRED_GCC to the single version we're using + REQUIRED_GCC = found_version + + +def ensure_msvc(): + """ + Check that an MSVC toolchain is available (Windows). + """ + print("Checking MSVC toolchain...") + cl_path = shutil.which("cl") + if not cl_path: + print( + "WARNING: 'cl.exe' was not found in PATH.\n" + "Please run this script from a Visual Studio Developer Command Prompt\n" + "(e.g. 'x64 Native Tools Command Prompt for VS 2022')." + ) + # Do not hard-fail here; building will likely fail later anyway. + else: + print(f"Found MSVC compiler: {cl_path}") def ensure_cuda(): + """Check CUDA availability and determine required GCC version on Linux.""" + global REQUIRED_GCC + which_or_fail("nvcc") - major, full = parse_nvcc_version() + major, minor, full = parse_nvcc_version() if major < REQUIRED_NVCC_MAJOR: sys.exit(f"CUDA toolkit version 12.0 or higher is required (found {full}).") print(f"CUDA toolkit version: {full}") + # On Linux, determine required GCC version based on nvcc version + if IS_LINUX: + REQUIRED_GCC = determine_required_gcc_version(major, minor) + if REQUIRED_GCC: + print(f"CUDA {full} is compatible with GCC: {', '.join(REQUIRED_GCC)}") + else: + print(f"CUDA {full} works with any GCC version") + def create_or_reuse_venv(venv_dir: Path): if venv_dir.exists(): @@ -93,14 +228,17 @@ def install_viennals( pip_path: Path, viennals_dir: Path | None, required_version: str, verbose: bool ): env = os.environ.copy() - env["CC"] = f"gcc-{REQUIRED_GCC}" - env["CXX"] = f"g++-{REQUIRED_GCC}" + + # On Linux, set CC and CXX environment variables + if IS_LINUX and REQUIRED_GCC is not None: + env["CC"] = f"gcc-{REQUIRED_GCC}" + env["CXX"] = f"g++-{REQUIRED_GCC}" current = pip_show_version(pip_path, "ViennaLS") if current is None: print("ViennaLS not installed. A local build is required.") if viennals_dir is None: - ensure_git() + which_or_fail("git") # git is required to clone print(f"Cloning ViennaLS v{required_version}…") with tempfile.TemporaryDirectory(prefix="ViennaLS_tmp_install_") as tmp: tmp_path = Path(tmp) @@ -129,12 +267,12 @@ def install_viennals( run(cmd, cwd=viennals_dir, env=env) else: print( - f"ViennaLS already installed ({current}). Local build is required and version should be {required_version}." + f"ViennaLS already installed ({current}). " + f"A local build is required and the version should be {required_version}." ) if current != required_version: - sys.exit( - f"Version mismatch. Please change to the required version {required_version}.\n" - "Then re-run this script." + print( + "WARNING: Installed ViennaLS version does not match the required version." ) print("Proceeding with the currently installed ViennaLS.") @@ -168,7 +306,6 @@ def get_viennaps_dir(viennaps_dir_arg: str | None) -> Path: def install_viennaps( pip_path: Path, viennaps_dir: Path, - optix_dir: Path | None, debug_build: bool, gpu_build: bool, verbose: bool, @@ -179,36 +316,43 @@ def install_viennaps( sys.exit( f"{viennaps_dir} does not look like a ViennaPS source directory (missing CMakeLists.txt)." ) + env = os.environ.copy() - env["CC"] = f"gcc-{REQUIRED_GCC}" - env["CXX"] = f"g++-{REQUIRED_GCC}" - # GPU on - cmake_args = [] - if gpu_build: - cmake_args = ["-DVIENNAPS_USE_GPU=ON"] + # On Linux, set CC and CXX environment variables + if IS_LINUX and REQUIRED_GCC is not None: + env["CC"] = f"gcc-{REQUIRED_GCC}" + env["CXX"] = f"g++-{REQUIRED_GCC}" - if debug_build: - print("Enabling debug build.") - env["CMAKE_BUILD_TYPE"] = "Debug" - env["SKBUILD_CMAKE_BUILD_TYPE"] = "Debug" + cmake_args: list[str] = [] - if optix_dir is not None: - env["OptiX_INSTALL_DIR"] = str(optix_dir) + # GPU on/off + if gpu_build: + cmake_args.append("-DVIENNAPS_USE_GPU=ON") + else: + cmake_args.append("-DVIENNAPS_USE_GPU=OFF") env["CMAKE_ARGS"] = " ".join(cmake_args) + cmd = [str(pip_path), "install", "--no-deps", "."] + + if debug_build: + print("Enabling debug build.") + cmd.append("--config-settings=cmake.build-type=Debug") + if verbose: cmd.append("-v") + + # Run the installation run(cmd, cwd=viennaps_dir, env=env) def main(): parser = argparse.ArgumentParser( - description="Dev setup for ViennaPS with GPU support (Linux)." + description=f"Dev setup for ViennaPS with GPU support ({OS_NAME})." ) parser.add_argument( - "--verbose", action="store_true", help="Enable verbose pip builds." + "--verbose", "-v", action="store_true", help="Enable verbose pip builds." ) parser.add_argument( "--venv", default=".venv", help="Path to the virtual environment directory." @@ -228,11 +372,6 @@ def main(): default=None, help="Path to ViennaPS directory (defaults to current dir if it's ViennaPS).", ) - parser.add_argument( - "--optix", - default=None, - help="Path to OptiX installation directory (optional - will auto-download OptiX headers if not provided).", - ) parser.add_argument( "--debug-build", action="store_true", @@ -252,31 +391,8 @@ def main(): if not args.skip_toolchain_check or not args.no_gpu: print("Checking toolchain...") - ensure_compilers() ensure_cuda() - - # OptiX dir - if not args.no_gpu: - optix_dir = args.optix or os.environ.get("OptiX_INSTALL_DIR") - if not optix_dir: - print("No OptiX directory provided. Will auto-download OptiX headers.") - print("\nWARNING: OptiX uses a different license than ViennaPS.") - print( - "By proceeding with auto-download, you agree to the NVIDIA OptiX license terms." - ) - print( - "Please review the OptiX license at: https://developer.nvidia.com/designworks/sdk-samples-tools-software-license-agreement" - ) - print("If you do not agree, abort now (Ctrl+C).") - input("Press Enter to continue...") - optix_dir = None - else: - optix_dir = Path(optix_dir).expanduser().resolve() - if not optix_dir.exists(): - sys.exit(f"OptiX directory not found: {optix_dir}") - print(f"Using OptiX at: {optix_dir}") - else: - optix_dir = None + ensure_compilers() # venv venv_dir = Path(args.venv).expanduser().resolve() @@ -289,14 +405,11 @@ def main(): ) install_viennals(venv_pip, viennals_dir, args.viennals_version, args.verbose) - # ViennaPS dir + # ViennaPS viennaps_dir = get_viennaps_dir(args.viennaps_dir) - - # ViennaPS install install_viennaps( venv_pip, viennaps_dir, - optix_dir, args.debug_build, not args.no_gpu, args.verbose,