diff --git a/CMakeLists.txt b/CMakeLists.txt
index 75d1a9da..dd8750b8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -117,7 +117,7 @@ CPMAddPackage(
 
 CPMAddPackage(
   NAME ViennaRay
-  VERSION 3.10.1
+  VERSION 3.10.2
   GIT_REPOSITORY "https://github.com/ViennaTools/ViennaRay"
   EXCLUDE_FROM_ALL ${VIENNAPS_BUILD_PYTHON}
   OPTIONS "VIENNARAY_USE_GPU ${VIENNAPS_USE_GPU}")
diff --git a/README.md b/README.md
index dc60f296..4fd13946 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ ViennaPS is also available on the [Python Package Index (PyPI)](https://pypi.org
 
 * macOS (clang)
 
-* Windows (Visual Studio)
+* Windows (MSVC)
 
 ### System Requirements
 
@@ -55,7 +55,7 @@ ViennaPS is also available on the [Python Package Index (PyPI)](https://pypi.org
 
 ### ViennaTools Dependencies (installed automatically)
 
-ViennaPS is part of the ViennaTools ecosystem and depends on several lightweight, header-only ViennaTools libraries. During configuration, CMake will look for them and fetch them automatically as part of the ViennaPS build. No separate installation step is required for these dependencies:
+ViennaPS is part of the ViennaTools ecosystem and depends on several lightweight, header-only ViennaTools libraries. During configuration, CMake will fetch them automatically as part of the ViennaPS build. No separate installation step is required for these dependencies:
 
 * [ViennaCore](https://github.com/ViennaTools/viennacore) 
 * [ViennaLS](https://github.com/ViennaTools/viennals) 
@@ -98,6 +98,14 @@ cd ViennaPS
 pip install .
 ```
 
+To build the Python package with **GPU** support, use the install script in `python/scripts` folder. On Linux, e.g., run:
+```bash
+python3 -m venv .venv # create virtual environment (optional, but recommended)
+source .venv/bin/activate # activate virtual environment 
+python python/scripts/install_ViennaPS.py
+```
+A CUDA toolkit and driver compatible with your GPU must be installed on your system to use the GPU functionality.
+
 > Some features of the ViennaPS Python module depend on the ViennaLS Python module. The ViennaLS is installed automatically as a dependency.
 > Note: A locally built ViennaPS Python module is typically not compatible with the ViennaLS package from PyPI. For details and troubleshooting, see [this guide](https://viennatools.github.io/ViennaPS/inst/troubleshooting.html#python-importerror).
 
diff --git a/examples/DRAMWiggling/DRAMWiggling.cpp b/examples/DRAMWiggling/DRAMWiggling.cpp
index f1fd0934..e7f0c2ee 100644
--- a/examples/DRAMWiggling/DRAMWiggling.cpp
+++ b/examples/DRAMWiggling/DRAMWiggling.cpp
@@ -3,8 +3,6 @@
 using namespace viennaps;
 
 int main(int argc, char **argv) {
-  using NumericType = double;
-  constexpr int D = 3;
 
   Logger::setLogLevel(LogLevel::INFO);
   omp_set_num_threads(12);
@@ -27,23 +25,22 @@ int main(int argc, char **argv) {
   units::Length::setUnit(params.get<std::string>("lengthUnit"));
   units::Time::setUnit(params.get<std::string>("timeUnit"));
 
-  constexpr NumericType gridDelta = 0.01 * (1. + 1e-12);
-  BoundaryType boundaryConds[D] = {BoundaryType::REFLECTIVE_BOUNDARY,
+  constexpr double gridDelta = 0.01 * (1. + 1e-12);
+  BoundaryType boundaryConds[3] = {BoundaryType::REFLECTIVE_BOUNDARY,
                                    BoundaryType::REFLECTIVE_BOUNDARY,
                                    BoundaryType::INFINITE_BOUNDARY};
 
-  auto mask =
-      SmartPointer<GDSGeometry<NumericType, D>>::New(gridDelta, boundaryConds);
+  auto mask = SmartPointer<GDSGeometry_double_3>::New(gridDelta, boundaryConds);
   mask->setBoundaryPadding(0.1, 0.1);
-  GDSReader<NumericType, D>(mask, params.get<std::string>("gdsFile")).apply();
+  GDSReader_double_3(mask, params.get<std::string>("gdsFile")).apply();
 
   // geometry setup
-  auto geometry = Domain<NumericType, D>::New();
+  auto geometry = Domain_double_3::New();
   auto maskLS = mask->layerToLevelSet(0, 0.0, 0.18);
   geometry->insertNextLevelSetAsMaterial(maskLS, Material::Mask);
-  MakePlane<NumericType, D>(geometry, 0.0, Material::Si, true).apply();
+  MakePlane_double_3(geometry, 0.0, Material::Si, true).apply();
 
-  auto modelParams = HBrO2Etching<NumericType, D>::defaultParameters();
+  auto modelParams = HBrO2Etching_double_3::defaultParameters();
   modelParams.ionFlux = params.get("ionFlux");
   modelParams.etchantFlux = params.get("etchantFlux");
   modelParams.passivationFlux = params.get("oxygenFlux");
@@ -52,7 +49,7 @@ int main(int argc, char **argv) {
   modelParams.Ions.exponent = params.get("ionExponent");
   modelParams.Ions.n_l = 200;
   modelParams.Substrate.B_sp = 0.75;
-  auto model = SmartPointer<HBrO2Etching<NumericType, D>>::New(modelParams);
+  auto model = SmartPointer<HBrO2Etching_double_3>::New(modelParams);
 
   // Advection parameters
   AdvectionParameters advectionParams;
@@ -69,7 +66,7 @@ int main(int argc, char **argv) {
   coverageParams.tolerance = 1e-5;
 
   // Process setup
-  Process<NumericType, D> process(geometry, model, params.get("processTime"));
+  Process_double_3 process(geometry, model, params.get("processTime"));
   process.setParameters(advectionParams);
   process.setParameters(rayParams);
   process.setParameters(coverageParams);
diff --git a/examples/DRAMWiggling/DRAMWiggling.py b/examples/DRAMWiggling/DRAMWiggling.py
index d6ebb3ae..35df9199 100644
--- a/examples/DRAMWiggling/DRAMWiggling.py
+++ b/examples/DRAMWiggling/DRAMWiggling.py
@@ -35,9 +35,6 @@
 # Add plane
 ps.MakePlane(geometry, 0.0, ps.Material.Si, True).apply()
 
-# print intermediate output surfaces during the process
-ps.Logger.setLogLevel(ps.LogLevel.INFO)
-
 ps.Length.setUnit(params["lengthUnit"])
 ps.Time.setUnit(params["timeUnit"])
 
@@ -49,6 +46,7 @@
 modelParams.Ions.sigmaEnergy = params["sigmaEnergy"]
 modelParams.Ions.exponent = params["ionExponent"]
 modelParams.Ions.n_l = 200
+modelParams.Substrate.B_sp = 0.75
 model = ps.HBrO2Etching(modelParams)
 
 coverageParameters = ps.CoverageParameters()
@@ -58,9 +56,7 @@
 rayTracingParams.raysPerPoint = int(params["raysPerPoint"])
 
 advectionParams = ps.AdvectionParameters()
-advectionParams.spatialScheme = ps.util.convertSpatialScheme(
-    params["spatialScheme"]
-)
+advectionParams.spatialScheme = ps.util.convertSpatialScheme(params["spatialScheme"])
 
 fluxEngineStr = params["fluxEngine"]
 fluxEngine = ps.util.convertFluxEngineType(fluxEngineStr)
diff --git a/examples/SiGeSelectiveEtching/SiGeEtching.py b/examples/SiGeSelectiveEtching/SiGeEtching.py
index 369ff687..1c5a1029 100644
--- a/examples/SiGeSelectiveEtching/SiGeEtching.py
+++ b/examples/SiGeSelectiveEtching/SiGeEtching.py
@@ -1,7 +1,7 @@
-import viennaps.d2 as psd
 import viennaps as ps
 from SiGeStackGeometry import CreateGeometry
 
+ps.setDimension(2)
 ps.setNumThreads(16)
 
 # create initial geometry
@@ -71,7 +71,7 @@
     ps.Material.SiGe: 0.7,
 }
 
-model = psd.CF4O2Etching(modelParams)
+model = ps.CF4O2Etching(modelParams)
 parameters = model.getParameters()
 
 covParams = ps.CoverageParameters()
@@ -85,7 +85,7 @@
 advParams.timeStepRatio = 0.2
 
 # process setup
-process = psd.Process()
+process = ps.Process()
 process.setDomain(geometry)
 process.setProcessModel(model)
 process.setProcessDuration(params["processTime"])  # seconds
diff --git a/examples/SiGeSelectiveEtching/SiGeStackGeometry.py b/examples/SiGeSelectiveEtching/SiGeStackGeometry.py
index fd885e5a..d502ae07 100644
--- a/examples/SiGeSelectiveEtching/SiGeStackGeometry.py
+++ b/examples/SiGeSelectiveEtching/SiGeStackGeometry.py
@@ -1,14 +1,13 @@
 # Create a SiGe stack geometry with SiO2 mask
 
-import viennaps.d2 as psd
 import viennaps as ps
-import viennals.d2 as lsd
 from viennals import BooleanOperationEnum
 
+ps.setDimension(2)
 
-def CreateGeometry(paramDict: dict) -> psd.Domain:
-    domain = psd.Domain()
 
+def CreateGeometry(paramDict: dict) -> ps.Domain:
+    domain = ps.Domain()
     totalHeight = paramDict["numLayers"] * paramDict["layerHeight"]
     extent = (
         2 * paramDict["lateralSpacing"]
@@ -25,8 +24,8 @@ def CreateGeometry(paramDict: dict) -> psd.Domain:
     origin = [0, 0]
 
     # substrate plane
-    plane = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"])
-    lsd.MakeGeometry(plane, lsd.Plane(origin, normal)).apply()
+    plane = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"])
+    ps.ls.MakeGeometry(plane, ps.ls.Plane(origin, normal)).apply()
     domain.insertNextLevelSetAsMaterial(
         levelSet=plane, material=ps.Material.Si, wrapLowerLevelSet=True
     )
@@ -34,8 +33,8 @@ def CreateGeometry(paramDict: dict) -> psd.Domain:
     # alternating layers
     for i in range(paramDict["numLayers"]):
         origin[1] += paramDict["layerHeight"]
-        plane = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"])
-        lsd.MakeGeometry(plane, lsd.Plane(origin, normal)).apply()
+        plane = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"])
+        ps.ls.MakeGeometry(plane, ps.ls.Plane(origin, normal)).apply()
         if i % 2 == 0:
             domain.insertNextLevelSetAsMaterial(plane, ps.Material.SiGe)
         else:
@@ -44,15 +43,15 @@ def CreateGeometry(paramDict: dict) -> psd.Domain:
     # SiO2 mask
     maskPosY = totalHeight + paramDict["maskHeight"]
     origin[1] = maskPosY
-    mask = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"])
-    lsd.MakeGeometry(mask, lsd.Plane(origin, normal)).apply()
+    mask = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"])
+    ps.ls.MakeGeometry(mask, ps.ls.Plane(origin, normal)).apply()
     domain.insertNextLevelSetAsMaterial(mask, ps.Material.SiO2)
 
     # mask
     maskPosY = totalHeight + paramDict["maskHeight"] + 5 * paramDict["gridDelta"]
     origin[1] = maskPosY
-    etchMask = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"])
-    lsd.MakeGeometry(etchMask, lsd.Plane(origin, normal)).apply()
+    etchMask = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"])
+    ps.ls.MakeGeometry(etchMask, ps.ls.Plane(origin, normal)).apply()
     domain.insertNextLevelSetAsMaterial(etchMask, ps.Material.Mask)
 
     # left right space
@@ -64,20 +63,20 @@ def CreateGeometry(paramDict: dict) -> psd.Domain:
         -extent / 2 + paramDict["lateralSpacing"],
         maskPosY + paramDict["gridDelta"],
     ]
-    box = lsd.Domain(bounds, boundaryConds, paramDict["gridDelta"])
-    lsd.MakeGeometry(box, lsd.Box(minPoint, maxPoint)).apply()
+    box = ps.ls.Domain(bounds, boundaryConds, paramDict["gridDelta"])
+    ps.ls.MakeGeometry(box, ps.ls.Box(minPoint, maxPoint)).apply()
     domain.applyBooleanOperation(box, BooleanOperationEnum.RELATIVE_COMPLEMENT)
 
     minPoint[0] = extent / 2 - paramDict["lateralSpacing"]
     maxPoint[0] = extent / 2 + paramDict["gridDelta"]
-    lsd.MakeGeometry(box, lsd.Box(minPoint, maxPoint)).apply()
+    ps.ls.MakeGeometry(box, ps.ls.Box(minPoint, maxPoint)).apply()
     domain.applyBooleanOperation(box, BooleanOperationEnum.RELATIVE_COMPLEMENT)
 
     xpos = -extent / 2 + paramDict["lateralSpacing"] + paramDict["maskWidth"]
     for i in range(paramDict["numPillars"]):
         minPoint[0] = xpos
         maxPoint[0] = xpos + paramDict["trenchWidthTop"]
-        lsd.MakeGeometry(box, lsd.Box(minPoint, maxPoint)).apply()
+        ps.ls.MakeGeometry(box, ps.ls.Box(minPoint, maxPoint)).apply()
         domain.applyBooleanOperation(box, BooleanOperationEnum.RELATIVE_COMPLEMENT)
         xpos += paramDict["maskWidth"] + paramDict["trenchWidthTop"]
 
@@ -90,14 +89,14 @@ def CreateGeometry(paramDict: dict) -> psd.Domain:
         * (paramDict["trenchWidthTop"] - paramDict["trenchWidthBottom"])
         / (paramDict["numLayers"] * paramDict["layerHeight"] + paramDict["maskHeight"])
     )
-    processModel = psd.DirectionalProcess(direction, -1, isoVel, ps.Material.Mask)
+    processModel = ps.DirectionalProcess(direction, -1, isoVel, ps.Material.Mask)
 
     time = (
         paramDict["numLayers"] * paramDict["layerHeight"]
         + paramDict["maskHeight"]
         + paramDict["overEtch"]
     )
-    psd.Process(domain, processModel, time).apply()
+    ps.Process(domain, processModel, time).apply()
 
     # remove trench etching mask
     domain.removeTopLevelSet()
diff --git a/examples/TEOSTrenchDeposition/multiTEOS.py b/examples/TEOSTrenchDeposition/multiTEOS.py
index 07d0656e..388117e9 100644
--- a/examples/TEOSTrenchDeposition/multiTEOS.py
+++ b/examples/TEOSTrenchDeposition/multiTEOS.py
@@ -53,8 +53,8 @@
 process.setParameters(rayParams)
 process.setProcessDuration(params["processTime"])
 
-geometry.saveVolumeMesh("MultiTEOS_initial.vtp")
+geometry.saveVolumeMesh("MultiTEOS_initial")
 
 process.apply()
 
-geometry.saveVolumeMesh("MultiTEOS_final.vtp")
+geometry.saveVolumeMesh("MultiTEOS_final")
diff --git a/examples/TEOSTrenchDeposition/singleTEOS.py b/examples/TEOSTrenchDeposition/singleTEOS.py
index 75330909..48cbc5bd 100644
--- a/examples/TEOSTrenchDeposition/singleTEOS.py
+++ b/examples/TEOSTrenchDeposition/singleTEOS.py
@@ -51,8 +51,8 @@
 process.setParameters(rayParams)
 process.setProcessDuration(params["processTime"])
 
-geometry.saveVolumeMesh("SingleTEOS_initial.vtp")
+geometry.saveVolumeMesh("SingleTEOS_initial")
 
 process.apply()
 
-geometry.saveVolumeMesh("SingleTEOS_final.vtp")
+geometry.saveVolumeMesh("SingleTEOS_final")
diff --git a/examples/atomicLayerDeposition/atomicLayerDeposition.py b/examples/atomicLayerDeposition/atomicLayerDeposition.py
index 5f6c1328..3af2a782 100644
--- a/examples/atomicLayerDeposition/atomicLayerDeposition.py
+++ b/examples/atomicLayerDeposition/atomicLayerDeposition.py
@@ -43,7 +43,7 @@
 ls.MakeGeometry(horiBox, ls.Box(minPoint, maxPoint)).apply()
 geometry.applyBooleanOperation(horiBox, ls.BooleanOperationEnum.RELATIVE_COMPLEMENT)
 
-geometry.saveVolumeMesh("SingleParticleALD_initial.vtu")
+geometry.saveVolumeMesh("SingleParticleALD_initial")
 
 geometry.duplicateTopLevelSet(ps.Material.Al2O3)
 
@@ -81,4 +81,4 @@
 #   MeasureProfile<NumericType, D>(domain, params.get("gapHeight") / 2.)
 #       .save(params.get<std::string>("outputFile"));
 
-geometry.saveVolumeMesh("SingleParticleALD_final.vtu")
+geometry.saveVolumeMesh("SingleParticleALD_final")
diff --git a/examples/blazedGratingsEtching/blazedGratingsEtching.cpp b/examples/blazedGratingsEtching/blazedGratingsEtching.cpp
index 5beac354..5b386b21 100644
--- a/examples/blazedGratingsEtching/blazedGratingsEtching.cpp
+++ b/examples/blazedGratingsEtching/blazedGratingsEtching.cpp
@@ -7,9 +7,8 @@
 using namespace viennaps;
 
 int main(int argc, char **argv) {
-  using NumericType = double;
-  constexpr int D = 2;
   omp_set_num_threads(16);
+  Logger::setLogLevel(LogLevel::DEBUG);
 
   // Parse the parameters
   util::Parameters params;
@@ -25,10 +24,10 @@ int main(int argc, char **argv) {
     }
   }
 
-  auto geometry = GenerateMask<NumericType, D>(
-      params.get("bumpWidth"), params.get("bumpHeight"),
-      params.get<int>("numBumps"), params.get("bumpDuty"),
-      params.get("gridDelta"));
+  auto geometry =
+      GenerateMask<double, 2>(params.get("bumpWidth"), params.get("bumpHeight"),
+                              params.get<int>("numBumps"),
+                              params.get("bumpDuty"), params.get("gridDelta"));
   geometry->saveSurfaceMesh("initial");
 
   AdvectionParameters advParams;
@@ -40,9 +39,9 @@ int main(int argc, char **argv) {
   rayTracingParams.raysPerPoint = params.get<unsigned>("raysPerPoint");
   rayTracingParams.smoothingNeighbors = 1;
 
-  const NumericType yieldFac = params.get("yieldFactor");
+  const double yieldFac = params.get("yieldFactor");
 
-  IBEParameters<NumericType> ibeParams;
+  IBEParameters<double> ibeParams;
   ibeParams.materialPlaneWaferRate[Material::SiO2] = 1.0;
   ibeParams.materialPlaneWaferRate[Material::Mask] = 1. / 11.;
   ibeParams.exponent = params.get("exponent");
@@ -51,36 +50,44 @@ int main(int argc, char **argv) {
   ibeParams.cos4Yield.a1 = yieldFac;
   ibeParams.cos4Yield.a2 = -1.55;
   ibeParams.cos4Yield.a3 = 0.65;
-  auto model = SmartPointer<IonBeamEtching<NumericType, D>>::New(ibeParams);
-
-  Process<NumericType, D> process(geometry, model);
-  process.setParameters(advParams);
-  process.setParameters(rayTracingParams);
 
   // ANSGM Etch
-  NumericType angle = params.get("phi1");
-  NumericType angleRad = constants::degToRad(angle);
-  model->setPrimaryDirection(
-      Vec3D<NumericType>{-std::sin(angleRad), -std::cos(angleRad), 0});
-  ibeParams.tiltAngle = angle;
+  {
+    double angle = params.get("phi1");
+    double angleRad = constants::degToRad(angle);
+    ibeParams.tiltAngle = angle;
+    auto model = SmartPointer<IonBeamEtching_double_2>::New(ibeParams);
+    model->setPrimaryDirection(
+        Vec3Dd{-std::sin(angleRad), -std::cos(angleRad), 0});
+    model->setProcessName("ANSGM_Etch");
 
-  process.setProcessDuration(params.get("ANSGM_Depth"));
-  process.apply();
-  geometry->saveSurfaceMesh("ANSGM_Etch");
+    Process_double_2(geometry, model, params.get("ANSGM_Depth"), advParams,
+                     rayTracingParams)
+        .apply();
+    geometry->saveSurfaceMesh("ANSGM_Etch");
+  }
 
   geometry->removeTopLevelSet(); // remove mask
   geometry->saveSurfaceMesh("ANSGM");
 
   // Blazed Gratings Etch
-  angle = params.get("phi2");
-  angleRad = constants::degToRad(angle);
-  model->setPrimaryDirection(
-      Vec3D<NumericType>{-std::sin(angleRad), -std::cos(angleRad), 0});
-  ibeParams.tiltAngle = angle;
+  {
+    double angle = params.get("phi2");
+    double angleRad = constants::degToRad(angle);
+    ibeParams.tiltAngle = angle;
+    auto model = SmartPointer<IonBeamEtching_double_2>::New(ibeParams);
+    model->setPrimaryDirection(
+        Vec3Dd{-std::sin(angleRad), -std::cos(angleRad), 0});
+    model->setProcessName("BlazedGratings_Etch");
 
-  for (int i = 1; i < 5; ++i) {
-    process.setProcessDuration(params.get("etchTimeP" + std::to_string(i)));
-    process.apply();
-    geometry->saveSurfaceMesh("BlazedGratingsEtch_P" + std::to_string(i));
+    Process_double_2 process(geometry, model);
+    process.setParameters(advParams);
+    process.setParameters(rayTracingParams);
+
+    for (int i = 1; i < 5; ++i) {
+      process.setProcessDuration(params.get("etchTimeP" + std::to_string(i)));
+      process.apply();
+      geometry->saveSurfaceMesh("BlazedGratingsEtch_P" + std::to_string(i));
+    }
   }
 }
diff --git a/examples/blazedGratingsEtching/blazedGratingsEtching.py b/examples/blazedGratingsEtching/blazedGratingsEtching.py
index 39264555..fa09aff8 100644
--- a/examples/blazedGratingsEtching/blazedGratingsEtching.py
+++ b/examples/blazedGratingsEtching/blazedGratingsEtching.py
@@ -70,38 +70,51 @@
 ibeParams.cos4Yield.a2 = -1.55
 ibeParams.cos4Yield.a3 = 0.65
 
-model = ps.IonBeamEtching()
 
 # ----- ANSGM Etch ----- #
-angle = params["phi1"]
-direction = [0.0, 0.0, 0.0]
-direction[0] = -np.sin(np.deg2rad(angle))
-direction[1] = -np.cos(np.deg2rad(angle))
-ibeParams.tiltAngle = angle
-model.setPrimaryDirection(direction)
-model.setParameters(ibeParams)
-
-process = ps.Process(geometry, model, 0.0)
-process.setParameters(advectionParams)
-process.setParameters(rayTracingParams)
-
-process.setProcessDuration(params["ANSGM_Depth"])
-process.apply()
-geometry.saveSurfaceMesh("ANSGM_Etch", True)
+def ANSGM_Etch():
+    angle = params["phi1"]
+    direction = [0.0, 0.0, 0.0]
+    direction[0] = -np.sin(np.deg2rad(angle))
+    direction[1] = -np.cos(np.deg2rad(angle))
+    ibeParams.tiltAngle = angle
+    model = ps.IonBeamEtching(ibeParams, maskMaterials=[])
+    model.setPrimaryDirection(direction)
+    model.setProcessName("ANSGM_Etch")
+
+    process = ps.Process(geometry, model, params["ANSGM_Depth"])
+    process.setParameters(advectionParams)
+    process.setParameters(rayTracingParams)
+    process.apply()
+    geometry.saveSurfaceMesh("ANSGM_Etch", True)
+
+
+ANSGM_Etch()
 
 # remove mask
 geometry.removeTopLevelSet()
 geometry.saveSurfaceMesh("ANSGM", True)
 
+
 # ------ Blazed Gratins Etch ------ #
-angle = params["phi2"]
-direction[0] = -np.sin(np.deg2rad(angle))
-direction[1] = -np.cos(np.deg2rad(angle))
-ibeParams.tiltAngle = angle
-model.setPrimaryDirection(direction)
-model.setParameters(ibeParams)
-
-for i in range(1, 5):
-    process.setProcessDuration(params["etchTimeP" + str(i)])
-    process.apply()
-    geometry.saveSurfaceMesh("BlazedGratingsEtch_P" + str(i))
+def BlazedGratings_Etch():
+    angle = params["phi2"]
+    direction = [0.0, 0.0, 0.0]
+    direction[0] = -np.sin(np.deg2rad(angle))
+    direction[1] = -np.cos(np.deg2rad(angle))
+    ibeParams.tiltAngle = angle
+    model = ps.IonBeamEtching(ibeParams, maskMaterials=[])
+    model.setPrimaryDirection(direction)
+    model.setProcessName("BlazedGratings_Etch")
+
+    process = ps.Process(geometry, model)
+    process.setParameters(advectionParams)
+    process.setParameters(rayTracingParams)
+
+    for i in range(1, 5):
+        process.setProcessDuration(params["etchTimeP" + str(i)])
+        process.apply()
+        geometry.saveSurfaceMesh("BlazedGratingsEtch_P" + str(i))
+
+
+BlazedGratings_Etch()
diff --git a/examples/boschProcess/config.txt b/examples/boschProcess/config.txt
index 8beb66d2..cc046de4 100644
--- a/examples/boschProcess/config.txt
+++ b/examples/boschProcess/config.txt
@@ -1,7 +1,7 @@
 # Geometry
 gridDelta = 0.025
 xExtent = 3.5
-yExtent = 3.5
+yExtent = 1.5
 
 trenchWidth = 2.0
 maskHeight = 0.6
diff --git a/examples/emulation/FinFET.py b/examples/emulation/FinFET.py
index 2865f7b2..13c18761 100644
--- a/examples/emulation/FinFET.py
+++ b/examples/emulation/FinFET.py
@@ -195,9 +195,7 @@ def writeSurface(domain):
 print("S/D Epitaxy ...", end="", flush=True)
 domain.duplicateTopLevelSet(ps.Material.SiGe)
 advectionParams = ps.AdvectionParameters()
-advectionParams.spatialScheme = (
-    ps.SpatialScheme.STENCIL_LOCAL_LAX_FRIEDRICHS_1ST_ORDER
-)
+advectionParams.spatialScheme = ps.SpatialScheme.STENCIL_LOCAL_LAX_FRIEDRICHS_1ST_ORDER
 ps.StencilLocalLaxFriedrichsScalar.setMaxDissipation(1000)
 material = [
     (ps.Material.Si, 1.0),
diff --git a/examples/faradayCageEtching/config.txt b/examples/faradayCageEtching/config.txt
index 02768747..439993a0 100644
--- a/examples/faradayCageEtching/config.txt
+++ b/examples/faradayCageEtching/config.txt
@@ -11,4 +11,3 @@ tiltAngle = 60.0
 cageAngle = 90.0
 
 etchTime = 7.0
-raysPerPoint = 1000
diff --git a/examples/faradayCageEtching/faradayCageEtching.cpp b/examples/faradayCageEtching/faradayCageEtching.cpp
index 5913c461..ff6f1e03 100644
--- a/examples/faradayCageEtching/faradayCageEtching.cpp
+++ b/examples/faradayCageEtching/faradayCageEtching.cpp
@@ -10,7 +10,6 @@ int main(int argc, char *argv[]) {
   using NumericType = double;
   constexpr int D = 3;
 
-  ps::Logger::setLogLevel(ps::LogLevel::INTERMEDIATE);
   omp_set_num_threads(16);
 
   // Parse the parameters
@@ -46,20 +45,15 @@ int main(int argc, char *argv[]) {
       cageParams, maskMaterials);
 
   ps::AdvectionParameters advectionParams;
-  advectionParams.spatialScheme =
-      ps::SpatialScheme::LOCAL_LAX_FRIEDRICHS_1ST_ORDER;
-
-  ps::RayTracingParameters rayParams;
-  rayParams.raysPerPoint = params.get<int>("raysPerPoint");
+  advectionParams.spatialScheme = ps::SpatialScheme::LAX_FRIEDRICHS_1ST_ORDER;
 
   // process setup
   ps::Process<NumericType, D> process(geometry, model);
   process.setProcessDuration(params.get("etchTime"));
-  process.setParameters(rayParams);
   process.setParameters(advectionParams);
 
   // print initial surface
-  geometry->saveHullMesh("initial.vtp");
+  geometry->saveHullMesh("initial");
 
   // run the process
   process.apply();
diff --git a/examples/faradayCageEtching/faradayCageEtching.py b/examples/faradayCageEtching/faradayCageEtching.py
index c8dcd3ae..9556bef7 100644
--- a/examples/faradayCageEtching/faradayCageEtching.py
+++ b/examples/faradayCageEtching/faradayCageEtching.py
@@ -42,12 +42,16 @@
 parameters.ibeParams.tiltAngle = params["tiltAngle"]
 mask = [ps.Material.Mask]
 
-model = ps.FaradayCageEtching(mask, parameters)
+model = ps.FaradayCageEtching(parameters, mask)
+
+advParams = ps.AdvectionParameters()
+advParams.spatialScheme = ps.SpatialScheme.LAX_FRIEDRICHS_1ST_ORDER
 
 # process setup
 process = ps.Process()
 process.setDomain(geometry)
 process.setProcessModel(model)
+process.setParameters(advParams)
 process.setProcessDuration(params["etchTime"])  # seconds
 
 # print initial surface
diff --git a/examples/holeEtching/config.txt b/examples/holeEtching/config.txt
index 4af1874e..81f19b0f 100644
--- a/examples/holeEtching/config.txt
+++ b/examples/holeEtching/config.txt
@@ -32,5 +32,3 @@ spatialScheme=EO_1
 temporalScheme=RK3
 
 raysPerPoint=1000
-
-outputFile=final_y0p62
diff --git a/examples/holeEtching/holeEtching.cpp b/examples/holeEtching/holeEtching.cpp
index b964ca61..831cf822 100644
--- a/examples/holeEtching/holeEtching.cpp
+++ b/examples/holeEtching/holeEtching.cpp
@@ -37,7 +37,7 @@ int main(int argc, char *argv[]) {
                            0.0, // holeDepth
                            0.0, // holeTaperAngle
                            params.get("maskHeight"), params.get("taperAngle"),
-                           HoleShape::QUARTER)
+                           HoleShape::HALF)
       .apply();
 
   // use pre-defined model SF6O2 etching model
@@ -79,6 +79,5 @@ int main(int argc, char *argv[]) {
   process.apply();
 
   // print final surface
-  geometry->saveSurfaceMesh(params.get<std::string>("outputFile"), true, 0.01,
-                            true);
+  geometry->saveSurfaceMesh("final.vtp", true, 0.01, true);
 }
diff --git a/examples/selectiveEpitaxy/selectiveEpitaxy.py b/examples/selectiveEpitaxy/selectiveEpitaxy.py
index 5c38b059..55e3c2b7 100644
--- a/examples/selectiveEpitaxy/selectiveEpitaxy.py
+++ b/examples/selectiveEpitaxy/selectiveEpitaxy.py
@@ -48,9 +48,7 @@
 )
 
 advectionParams = ps.AdvectionParameters()
-advectionParams.spatialScheme = (
-    ps.SpatialScheme.STENCIL_LOCAL_LAX_FRIEDRICHS_1ST_ORDER
-)
+advectionParams.spatialScheme = ps.SpatialScheme.STENCIL_LOCAL_LAX_FRIEDRICHS_1ST_ORDER
 
 process = ps.Process(geometry, model, params["processTime"])
 process.setParameters(advectionParams)
diff --git a/gpu/benchmark/CPU_Benchmark.cpp b/gpu/benchmark/CPU_Benchmark.cpp
index 257a890c..9c5fa51c 100644
--- a/gpu/benchmark/CPU_Benchmark.cpp
+++ b/gpu/benchmark/CPU_Benchmark.cpp
@@ -125,6 +125,8 @@ int main() {
     tracer.setUseRandomSeeds(false);
     tracer.setParticleType(particle);
 
+    std::vector<std::string> dataLabels = particle->getLocalDataLabels();
+
     for (int i = 0; i < gridDeltaValues.size(); i++) {
       std::cout << "  Grid Delta: " << gridDeltaValues[i] << "\n";
       auto domain = MAKE_GEO<NumericType>(gridDeltaValues[i]);
@@ -186,10 +188,11 @@ int main() {
           tracer.normalizeFlux(fluxResult);
           fluxResultVec.push_back(std::move(fluxResult));
         }
-        ElementToPointData<NumericType, float, float>(
-            IndexMap(particles), fluxResultVec, pointData, elementKdTree,
-            diskMesh, surfMesh, domain->getGridDelta() * 2.0f)
-            .apply();
+        ElementToPointData<NumericType, float, float> post(
+            dataLabels, pointData, elementKdTree, diskMesh, surfMesh,
+            domain->getGridDelta() * 2.0f);
+        post.setElementDataArrays(std::move(fluxResultVec));
+        post.apply();
         auto velocities = SmartPointer<std::vector<NumericType>>::New(
             std::move(*pointData->getScalarData(fluxLabel)));
         velocityField->prepare(domain, velocities, 0.);
diff --git a/gpu/benchmark/GPU_Benchmark.cpp b/gpu/benchmark/GPU_Benchmark.cpp
index 642f43b6..1df0f15b 100644
--- a/gpu/benchmark/GPU_Benchmark.cpp
+++ b/gpu/benchmark/GPU_Benchmark.cpp
@@ -55,6 +55,9 @@ int main() {
     }
     tracer.prepareParticlePrograms();
 
+    std::vector<std::string> dataLabels =
+        std::get<0>(particleConfig).dataLabels;
+
     std::cout << "Starting Triangle Benchmark\n";
 
     for (auto gd : gridDeltaValues) {
@@ -109,12 +112,14 @@ int main() {
         // POSTPROCESSING
         timer.start();
         auto pointData = viennals::PointData<NumericType>::New();
+        ElementToPointData<NumericType, float, viennaray::gpu::ResultType> post(
+            dataLabels, pointData, elementKdTree, diskMesh, surfMesh,
+            domain->getGridDelta() * 2.0f);
+        post.prepare();
         tracer.normalizeResults();
+        post.setElementDataArrays(tracer.getResults());
+        post.convert();
         // tracer.downloadResults();
-        ElementToPointData<NumericType, float, viennaray::gpu::ResultType>(
-            IndexMap(tracer.getParticles()), tracer.getResults(), pointData,
-            elementKdTree, diskMesh, surfMesh, domain->getGridDelta() * 2.0f)
-            .apply();
         auto velocities = SmartPointer<std::vector<NumericType>>::New(
             std::move(*pointData->getScalarData("flux")));
         velocityField->prepare(domain, velocities, 0.);
diff --git a/include/viennaps/models/psFaradayCageEtching.hpp b/include/viennaps/models/psFaradayCageEtching.hpp
index c8f75465..3a2418f9 100644
--- a/include/viennaps/models/psFaradayCageEtching.hpp
+++ b/include/viennaps/models/psFaradayCageEtching.hpp
@@ -27,6 +27,28 @@ template <typename NumericType> struct FaradayCageParameters {
 };
 
 namespace impl {
+template <typename NumericType, int D>
+auto getFaradayCageSourceDirections(NumericType tiltAngle,
+                                    NumericType cageAngle) {
+  NumericType cosTilt = std::cos(tiltAngle * M_PI / 180.);
+  NumericType sinTilt = std::sin(tiltAngle * M_PI / 180.);
+  NumericType cage_y = std::cos(cageAngle * M_PI / 180.);
+  NumericType cage_x = std::sin(cageAngle * M_PI / 180.);
+
+  Vec3D<NumericType> direction1{-cosTilt * cage_x, cosTilt * cage_y, -sinTilt};
+  if constexpr (D == 2)
+    std::swap(direction1[1], direction1[2]);
+  VIENNACORE_LOG_DEBUG("FaradayCageEtching: Source direction 1: " +
+                       util::arrayToString(direction1));
+
+  Vec3D<NumericType> direction2{cosTilt * cage_x, -cosTilt * cage_y, -sinTilt};
+  if constexpr (D == 2)
+    std::swap(direction2[1], direction2[2]);
+  VIENNACORE_LOG_DEBUG("FaradayCageEtching: Source direction 2: " +
+                       util::arrayToString(direction2));
+
+  return std::make_pair(direction1, direction2);
+}
 
 template <typename NumericType, int D>
 class PeriodicSource : public viennaray::Source<NumericType> {
@@ -40,33 +62,10 @@ class PeriodicSource : public viennaray::Source<NumericType> {
         zPos_(boundingBox[1][D - 1] + 2 * gridDelta), gridDelta_(gridDelta),
         ee_{2 / (cosinePower + 1)} {
 
-    NumericType cage_x = std::cos(cageAngle * M_PI / 180.);
-    NumericType cage_y = std::sin(cageAngle * M_PI / 180.);
-    NumericType cosTilt = std::cos(tiltAngle * M_PI / 180.);
-    NumericType sinTilt = std::sin(tiltAngle * M_PI / 180.);
-
-    Vec3D<NumericType> direction;
-    direction[0] = -cosTilt * cage_y;
-    direction[1] = cosTilt * cage_x;
-    direction[2] = -sinTilt;
-    if constexpr (D == 2)
-      std::swap(direction[1], direction[2]);
-
-    VIENNACORE_LOG_DEBUG("FaradayCageEtching: Source direction 1: " +
-                         util::arrayToString(direction));
-
-    orthoBasis1_ = rayInternal::getOrthonormalBasis(direction);
-
-    direction[0] = cosTilt * cage_y;
-    direction[1] = -cosTilt * cage_x;
-    direction[2] = -sinTilt;
-    if constexpr (D == 2)
-      std::swap(direction[1], direction[2]);
-
-    VIENNACORE_LOG_DEBUG("FaradayCageEtching: Source direction 2: " +
-                         util::arrayToString(direction));
-
-    orthoBasis2_ = rayInternal::getOrthonormalBasis(direction);
+    auto [direction1, direction2] =
+        getFaradayCageSourceDirections<NumericType, D>(tiltAngle, cageAngle);
+    orthoBasis1_ = rayInternal::getOrthonormalBasis(direction1);
+    orthoBasis2_ = rayInternal::getOrthonormalBasis(direction2);
   }
 
   std::array<Vec3D<NumericType>, 2>
@@ -186,17 +185,14 @@ class FaradayCageEtching final : public ProcessModelGPU<NumericType, D> {
                      const std::vector<Material> &maskMaterials)
       : params_(params), maskMaterials_(maskMaterials) {
 
-    NumericType cosTilt = std::cos(params.ibeParams.tiltAngle * M_PIf / 180.f);
-    NumericType sinTilt = std::sin(params.ibeParams.tiltAngle * M_PIf / 180.f);
-    NumericType cage_y = std::cos(params.cageAngle * M_PIf / 180.f);
-    NumericType cage_x = std::sin(params.cageAngle * M_PIf / 180.f);
-
+    auto [direction1, direction2] =
+        ::viennaps::impl::getFaradayCageSourceDirections<NumericType, D>(
+            params.ibeParams.tiltAngle, params.cageAngle);
     viennaray::gpu::Particle<NumericType> particle1{
         .name = "Ion1",
         .cosineExponent = params.ibeParams.exponent,
         .useCustomDirection = true,
-        .direction =
-            Vec3D<NumericType>{-cage_x * cosTilt, cage_y * cosTilt, -sinTilt}};
+        .direction = direction1};
     particle1.dataLabels.push_back(
         ::viennaps::impl::IBESurfaceModel<NumericType>::fluxLabel);
     if (params.ibeParams.redepositionRate > 0.) {
@@ -209,8 +205,7 @@ class FaradayCageEtching final : public ProcessModelGPU<NumericType, D> {
         .name = "Ion2",
         .cosineExponent = params.ibeParams.exponent,
         .useCustomDirection = true,
-        .direction =
-            Vec3D<NumericType>{cage_x * cosTilt, -cage_y * cosTilt, -sinTilt}};
+        .direction = direction2};
     // NO ADDITIONAL FLUX ARRAY NEEDED, ALL PARTICLES WRITE TO THE SAME
     this->insertNextParticleType(particle2);
 
diff --git a/include/viennaps/process/psCPUTriangleEngine.hpp b/include/viennaps/process/psCPUTriangleEngine.hpp
index 5f5a73ab..7a5bc47d 100644
--- a/include/viennaps/process/psCPUTriangleEngine.hpp
+++ b/include/viennaps/process/psCPUTriangleEngine.hpp
@@ -18,6 +18,8 @@ class CPUTriangleEngine final : public FluxEngine<NumericType, D> {
   using KDTreeType =
       SmartPointer<KDTree<NumericType, std::array<NumericType, 3>>>;
   using MeshType = SmartPointer<viennals::Mesh<float>>;
+  using PostProcessingType =
+      ElementToPointData<NumericType, float, NumericType, true, D == 3>;
 
 public:
   ProcessResult checkInput(ProcessContext<NumericType, D> &context) override {
@@ -27,6 +29,7 @@ class CPUTriangleEngine final : public FluxEngine<NumericType, D> {
       VIENNACORE_LOG_WARNING("Invalid process model.");
       return ProcessResult::INVALID_INPUT;
     }
+    model_ = model;
     return ProcessResult::SUCCESS;
   }
 
@@ -55,20 +58,17 @@ class CPUTriangleEngine final : public FluxEngine<NumericType, D> {
     if (!context.rayTracingParams.useRandomSeeds)
       rayTracer_.setRngSeed(context.rayTracingParams.rngSeed);
 
-    auto model = std::dynamic_pointer_cast<ProcessModelCPU<NumericType, D>>(
-        context.model);
-
-    if (auto source = model->getSource()) {
+    if (auto source = model_->getSource()) {
       rayTracer_.setSource(source);
       VIENNACORE_LOG_INFO("Using custom source.");
     }
-    if (auto primaryDirection = model->getPrimaryDirection()) {
+    if (auto primaryDirection = model_->getPrimaryDirection()) {
       VIENNACORE_LOG_INFO("Using primary direction: " +
                           util::arrayToString(primaryDirection.value()));
       rayTracer_.setPrimaryDirection(primaryDirection.value());
     }
 
-    model->initializeParticleDataLogs();
+    model_->initializeParticleDataLogs();
 
     return ProcessResult::SUCCESS;
   }
@@ -91,8 +91,8 @@ class CPUTriangleEngine final : public FluxEngine<NumericType, D> {
     if constexpr (D == 2) {
       viennaray::LineMesh lineMesh;
       lineMesh.gridDelta = static_cast<float>(context.domain->getGridDelta());
-      lineMesh.lines = surfaceMesh_->lines;
-      lineMesh.nodes = surfaceMesh_->nodes;
+      lineMesh.lines = std::move(surfaceMesh_->lines);
+      lineMesh.nodes = std::move(surfaceMesh_->nodes);
       lineMesh.minimumExtent = surfaceMesh_->minimumExtent;
       lineMesh.maximumExtent = surfaceMesh_->maximumExtent;
 
@@ -218,14 +218,11 @@ class CPUTriangleEngine final : public FluxEngine<NumericType, D> {
     assert(fluxes != nullptr);
     fluxes->clear();
 
-    auto model = std::dynamic_pointer_cast<ProcessModelCPU<NumericType, D>>(
-        context.model);
-
     std::vector<std::vector<NumericType>> elementFluxes;
 
     unsigned particleIdx = 0;
-    for (auto &particle : model->getParticleTypes()) {
-      int dataLogSize = model->getParticleLogSize(particleIdx);
+    for (auto &particle : model_->getParticleTypes()) {
+      int dataLogSize = model_->getParticleLogSize(particleIdx);
       if (dataLogSize > 0) {
         rayTracer_.getDataLog().data.resize(1);
         rayTracer_.getDataLog().data[0].resize(dataLogSize, 0.);
@@ -267,17 +264,18 @@ class CPUTriangleEngine final : public FluxEngine<NumericType, D> {
         elementFluxes.push_back(std::move(flux));
       }
 
-      model->mergeParticleData(rayTracer_.getDataLog(), particleIdx);
+      model_->mergeParticleData(rayTracer_.getDataLog(), particleIdx);
       ++particleIdx;
     }
 
-    // map fluxes on points
-    ElementToPointData<NumericType, float, NumericType, true, D == 3>(
-        IndexMap(model->getParticleTypes()), elementFluxes, fluxes,
-        elementKdTree_, context.diskMesh, surfaceMesh_,
+    // map fluxes to points
+    PostProcessingType postProcessing(
+        model_->getParticleDataLabels(), fluxes, elementKdTree_,
+        context.diskMesh, surfaceMesh_,
         context.domain->getGridDelta() *
-            (context.rayTracingParams.smoothingNeighbors + 1))
-        .apply();
+            (context.rayTracingParams.smoothingNeighbors + 1));
+    postProcessing.setElementDataArrays(std::move(elementFluxes));
+    postProcessing.apply();
   }
 
   static viennaray::TracingData<NumericType>
@@ -297,6 +295,7 @@ class CPUTriangleEngine final : public FluxEngine<NumericType, D> {
 
 private:
   viennaray::TraceTriangle<NumericType, D> rayTracer_;
+  SmartPointer<ProcessModelCPU<NumericType, D>> model_;
 
   KDTreeType elementKdTree_;
   MeshType surfaceMesh_;
diff --git a/include/viennaps/process/psGPUDiskEngine.hpp b/include/viennaps/process/psGPUDiskEngine.hpp
index e85fbfbe..d56418cb 100644
--- a/include/viennaps/process/psGPUDiskEngine.hpp
+++ b/include/viennaps/process/psGPUDiskEngine.hpp
@@ -41,31 +41,15 @@ class GPUDiskEngine final : public FluxEngine<NumericType, D> {
       return ProcessResult::INVALID_INPUT;
     }
 
+    model_ = model;
+
     return ProcessResult::SUCCESS;
   }
 
   ProcessResult initialize(ProcessContext<NumericType, D> &context) override {
-    auto model =
-        std::dynamic_pointer_cast<gpu::ProcessModelGPU<NumericType, D>>(
-            context.model);
     if (!rayTracerInitialized_) {
-      // Check for periodic boundary conditions
-      bool periodicBoundary = false;
-      if (context.rayTracingParams.ignoreFluxBoundaries) {
-        rayTracer_.setIgnoreBoundary(true);
-      } else {
-        const auto &grid = context.domain->getGrid();
-        for (unsigned i = 0; i < D; ++i) {
-          if (grid.getBoundaryConditions(i) ==
-              viennals::BoundaryConditionEnum::PERIODIC_BOUNDARY) {
-            periodicBoundary = true;
-            break;
-          }
-        }
-      }
-
-      rayTracer_.setParticleCallableMap(model->getParticleCallableMap());
-      rayTracer_.setCallables(model->getCallableFileName(),
+      rayTracer_.setParticleCallableMap(model_->getParticleCallableMap());
+      rayTracer_.setCallables(model_->getCallableFileName(),
                               deviceContext_->modulePath);
       rayTracer_.setNumberOfRaysPerPoint(context.rayTracingParams.raysPerPoint);
       rayTracer_.setMaxBoundaryHits(context.rayTracingParams.maxBoundaryHits);
@@ -74,15 +58,20 @@ class GPUDiskEngine final : public FluxEngine<NumericType, D> {
       rayTracer_.setUseRandomSeeds(context.rayTracingParams.useRandomSeeds);
       if (!context.rayTracingParams.useRandomSeeds)
         rayTracer_.setRngSeed(context.rayTracingParams.rngSeed);
-      rayTracer_.setPeriodicBoundary(periodicBoundary);
-      rayTracer_.setIgnoreBoundary(
-          context.rayTracingParams.ignoreFluxBoundaries);
-      for (auto &particle : model->getParticleTypes()) {
+      for (auto &particle : model_->getParticleTypes()) {
         rayTracer_.insertNextParticle(particle);
       }
+
+      // Check boundary conditions
+      if (context.rayTracingParams.ignoreFluxBoundaries) {
+        rayTracer_.setIgnoreBoundary(true);
+      } else if (context.flags.domainHasPeriodicBoundaries) {
+        rayTracer_.setPeriodicBoundary(true);
+      }
+
       rayTracer_.prepareParticlePrograms();
     }
-    rayTracer_.setParameters(model->getProcessDataDPtr());
+    rayTracer_.setParameters(model_->getProcessDataDPtr());
     rayTracerInitialized_ = true;
 
     return ProcessResult::SUCCESS;
@@ -135,10 +124,7 @@ class GPUDiskEngine final : public FluxEngine<NumericType, D> {
 
     rayTracer_.setGeometry(diskMeshRay);
 
-    auto model =
-        std::dynamic_pointer_cast<gpu::ProcessModelGPU<NumericType, D>>(
-            context.model);
-    if (model->useMaterialIds()) {
+    if (model_->useMaterialIds()) {
       auto const &materialIds =
           *diskMesh->getCellData().getScalarData("MaterialIds");
       rayTracer_.setMaterialIds(materialIds);
@@ -154,13 +140,10 @@ class GPUDiskEngine final : public FluxEngine<NumericType, D> {
       SmartPointer<viennals::PointData<NumericType>> &fluxes) override {
 
     this->timer_.start();
-    auto model =
-        std::dynamic_pointer_cast<gpu::ProcessModelGPU<NumericType, D>>(
-            context.model);
 
     CudaBuffer d_coverages; // device buffer for coverages
     if (context.flags.useCoverages) {
-      auto coverages = model->getSurfaceModel()->getCoverages();
+      auto coverages = model_->getSurfaceModel()->getCoverages();
       assert(coverages);
       assert(context.diskMesh);
       auto numCov = coverages->getScalarDataSize();
@@ -187,7 +170,7 @@ class GPUDiskEngine final : public FluxEngine<NumericType, D> {
     // output
     if (Logger::hasIntermediate()) {
       if (context.flags.useCoverages) {
-        auto coverages = model->getSurfaceModel()->getCoverages();
+        auto coverages = model_->getSurfaceModel()->getCoverages();
         downloadCoverages(d_coverages, context.diskMesh->getCellData(),
                           coverages, context.diskMesh->getNodes().size());
       }
@@ -254,6 +237,7 @@ class GPUDiskEngine final : public FluxEngine<NumericType, D> {
 private:
   std::shared_ptr<DeviceContext> deviceContext_;
   viennaray::gpu::TraceDisk<NumericType, D> rayTracer_;
+  SmartPointer<gpu::ProcessModelGPU<NumericType, D>> model_;
 
   bool rayTracerInitialized_ = false;
 };
diff --git a/include/viennaps/process/psGPULineEngine.hpp b/include/viennaps/process/psGPULineEngine.hpp
index 69fa0010..4eed6957 100644
--- a/include/viennaps/process/psGPULineEngine.hpp
+++ b/include/viennaps/process/psGPULineEngine.hpp
@@ -50,31 +50,15 @@ class GPULineEngine final : public FluxEngine<NumericType, D> {
       return ProcessResult::INVALID_INPUT;
     }
 
+    model_ = model;
+
     return ProcessResult::SUCCESS;
   }
 
   ProcessResult initialize(ProcessContext<NumericType, D> &context) override {
-    auto model =
-        std::dynamic_pointer_cast<gpu::ProcessModelGPU<NumericType, D>>(
-            context.model);
     if (!rayTracerInitialized_) {
-      // Check for periodic boundary conditions
-      bool periodicBoundary = false;
-      if (context.rayTracingParams.ignoreFluxBoundaries) {
-        rayTracer_.setIgnoreBoundary(true);
-      } else {
-        const auto &grid = context.domain->getGrid();
-        for (unsigned i = 0; i < D; ++i) {
-          if (grid.getBoundaryConditions(i) ==
-              viennals::BoundaryConditionEnum::PERIODIC_BOUNDARY) {
-            periodicBoundary = true;
-            break;
-          }
-        }
-      }
-
-      rayTracer_.setParticleCallableMap(model->getParticleCallableMap());
-      rayTracer_.setCallables(model->getCallableFileName(),
+      rayTracer_.setParticleCallableMap(model_->getParticleCallableMap());
+      rayTracer_.setCallables(model_->getCallableFileName(),
                               deviceContext_->modulePath);
       rayTracer_.setNumberOfRaysPerPoint(context.rayTracingParams.raysPerPoint);
       rayTracer_.setMaxBoundaryHits(context.rayTracingParams.maxBoundaryHits);
@@ -83,15 +67,20 @@ class GPULineEngine final : public FluxEngine<NumericType, D> {
       rayTracer_.setUseRandomSeeds(context.rayTracingParams.useRandomSeeds);
       if (!context.rayTracingParams.useRandomSeeds)
         rayTracer_.setRngSeed(context.rayTracingParams.rngSeed);
-      rayTracer_.setPeriodicBoundary(periodicBoundary);
-      rayTracer_.setIgnoreBoundary(
-          context.rayTracingParams.ignoreFluxBoundaries);
-      for (auto &particle : model->getParticleTypes()) {
+      for (auto &particle : model_->getParticleTypes()) {
         rayTracer_.insertNextParticle(particle);
       }
+
+      // Check boundary conditions
+      if (context.rayTracingParams.ignoreFluxBoundaries) {
+        rayTracer_.setIgnoreBoundary(true);
+      } else if (context.flags.domainHasPeriodicBoundaries) {
+        rayTracer_.setPeriodicBoundary(true);
+      }
+
       rayTracer_.prepareParticlePrograms();
     }
-    rayTracer_.setParameters(model->getProcessDataDPtr());
+    rayTracer_.setParameters(model_->getProcessDataDPtr());
     rayTracerInitialized_ = true;
 
     return ProcessResult::SUCCESS;
@@ -140,10 +129,7 @@ class GPULineEngine final : public FluxEngine<NumericType, D> {
     surfaceMesh_->minimumExtent = lineMesh.minimumExtent;
     surfaceMesh_->maximumExtent = lineMesh.maximumExtent;
 
-    auto model =
-        std::dynamic_pointer_cast<gpu::ProcessModelGPU<NumericType, D>>(
-            context.model);
-    if (model->useMaterialIds()) {
+    if (model_->useMaterialIds()) {
       auto const &pointMaterialIds =
           diskMesh->getCellData().getScalarData("MaterialIds");
       std::vector<int> lineMaterialIds(surfaceMesh_->lines.size());
@@ -174,9 +160,6 @@ class GPULineEngine final : public FluxEngine<NumericType, D> {
       ProcessContext<NumericType, D> &context,
       SmartPointer<viennals::PointData<NumericType>> &fluxes) override {
     this->timer_.start();
-    auto model =
-        std::dynamic_pointer_cast<gpu::ProcessModelGPU<NumericType, D>>(
-            context.model);
 
     std::vector<Vec3D<NumericType>> elementCenters(surfaceMesh_->lines.size());
     for (int i = 0; i < surfaceMesh_->lines.size(); ++i) {
@@ -194,7 +177,7 @@ class GPULineEngine final : public FluxEngine<NumericType, D> {
     auto diskMesh = *context.diskMesh;
     CudaBuffer d_coverages; // device buffer for coverages
     if (context.flags.useCoverages) {
-      auto coverages = model->getSurfaceModel()->getCoverages();
+      auto coverages = model_->getSurfaceModel()->getCoverages();
       assert(coverages);
       assert(context.diskMesh);
       auto numCov = coverages->getScalarDataSize();
@@ -238,7 +221,7 @@ class GPULineEngine final : public FluxEngine<NumericType, D> {
     // output
     if (Logger::hasIntermediate()) {
       if (context.flags.useCoverages) {
-        auto coverages = model->getSurfaceModel()->getCoverages();
+        auto coverages = model_->getSurfaceModel()->getCoverages();
         downloadCoverages(d_coverages, context.diskMesh->getCellData(),
                           coverages, context.diskMesh->getNodes().size());
       }
@@ -377,6 +360,7 @@ class GPULineEngine final : public FluxEngine<NumericType, D> {
 private:
   std::shared_ptr<DeviceContext> deviceContext_;
   viennaray::gpu::TraceLine<NumericType, D> rayTracer_;
+  SmartPointer<gpu::ProcessModelGPU<NumericType, D>> model_;
 
   KDTreeType elementKdTree_;
   MeshType surfaceMesh_;
diff --git a/include/viennaps/process/psGPUTriangleEngine.hpp b/include/viennaps/process/psGPUTriangleEngine.hpp
index be7dad1f..8fc095a6 100644
--- a/include/viennaps/process/psGPUTriangleEngine.hpp
+++ b/include/viennaps/process/psGPUTriangleEngine.hpp
@@ -15,6 +15,8 @@
 
 #include <gpu/raygTraceTriangle.hpp>
 
+#include <cassert>
+
 namespace viennaps {
 
 using namespace viennacore;
@@ -24,10 +26,16 @@ class GPUTriangleEngine final : public FluxEngine<NumericType, D> {
   using KDTreeType =
       SmartPointer<KDTree<NumericType, std::array<NumericType, 3>>>;
   using MeshType = SmartPointer<viennals::Mesh<float>>;
+  using PostProcessingType =
+      ElementToPointData<NumericType, float, viennaray::gpu::ResultType, true,
+                         D == 3>;
 
 public:
   explicit GPUTriangleEngine(std::shared_ptr<DeviceContext> deviceContext)
-      : deviceContext_(deviceContext), rayTracer_(deviceContext) {}
+      : deviceContext_(deviceContext), rayTracer_(deviceContext) {
+    surfaceMesh_ = MeshType::New();
+    elementKdTree_ = KDTreeType::New();
+  }
 
   ProcessResult checkInput(ProcessContext<NumericType, D> &context) override {
 
@@ -50,31 +58,16 @@ class GPUTriangleEngine final : public FluxEngine<NumericType, D> {
       return ProcessResult::INVALID_INPUT;
     }
 
+    model_ = model;
+
     return ProcessResult::SUCCESS;
   }
 
   ProcessResult initialize(ProcessContext<NumericType, D> &context) override {
-    auto model =
-        std::dynamic_pointer_cast<gpu::ProcessModelGPU<NumericType, D>>(
-            context.model);
+    assert(model_ && "Model not set.");
     if (!rayTracerInitialized_) {
-      // Check for periodic boundary conditions
-      bool periodicBoundary = false;
-      if (context.rayTracingParams.ignoreFluxBoundaries) {
-        rayTracer_.setIgnoreBoundary(true);
-      } else {
-        const auto &grid = context.domain->getGrid();
-        for (unsigned i = 0; i < D; ++i) {
-          if (grid.getBoundaryConditions(i) ==
-              viennals::BoundaryConditionEnum::PERIODIC_BOUNDARY) {
-            periodicBoundary = true;
-            break;
-          }
-        }
-      }
-
-      rayTracer_.setParticleCallableMap(model->getParticleCallableMap());
-      rayTracer_.setCallables(model->getCallableFileName(),
+      rayTracer_.setParticleCallableMap(model_->getParticleCallableMap());
+      rayTracer_.setCallables(model_->getCallableFileName(),
                               deviceContext_->modulePath);
       rayTracer_.setNumberOfRaysPerPoint(context.rayTracingParams.raysPerPoint);
       rayTracer_.setUseRandomSeeds(context.rayTracingParams.useRandomSeeds);
@@ -83,15 +76,20 @@ class GPUTriangleEngine final : public FluxEngine<NumericType, D> {
         rayTracer_.setMaxReflections(context.rayTracingParams.maxReflections);
       if (!context.rayTracingParams.useRandomSeeds)
         rayTracer_.setRngSeed(context.rayTracingParams.rngSeed);
-      rayTracer_.setPeriodicBoundary(periodicBoundary);
-      rayTracer_.setIgnoreBoundary(
-          context.rayTracingParams.ignoreFluxBoundaries);
-      for (auto &particle : model->getParticleTypes()) {
+      for (auto &particle : model_->getParticleTypes()) {
         rayTracer_.insertNextParticle(particle);
       }
+
+      // Check boundary conditions
+      if (context.rayTracingParams.ignoreFluxBoundaries) {
+        rayTracer_.setIgnoreBoundary(true);
+      } else if (context.flags.domainHasPeriodicBoundaries) {
+        rayTracer_.setPeriodicBoundary(true);
+      }
+
       rayTracer_.prepareParticlePrograms();
     }
-    rayTracer_.setParameters(model->getProcessDataDPtr());
+    rayTracer_.setParameters(model_->getProcessDataDPtr());
     rayTracerInitialized_ = true;
 
     return ProcessResult::SUCCESS;
@@ -100,11 +98,9 @@ class GPUTriangleEngine final : public FluxEngine<NumericType, D> {
   ProcessResult
   updateSurface(ProcessContext<NumericType, D> &context) override {
     this->timer_.start();
-    if (!surfaceMesh_)
-      surfaceMesh_ = MeshType::New();
-    if (!elementKdTree_)
-      elementKdTree_ = KDTreeType::New();
 
+    assert(surfaceMesh_ && "Surface mesh not initialized.");
+    assert(elementKdTree_ && "Element KDTree not initialized.");
     CreateSurfaceMesh<NumericType, float, D>(
         context.domain->getSurface(), surfaceMesh_, elementKdTree_, 1e-12,
         context.rayTracingParams.minNodeDistanceFactor)
@@ -116,8 +112,8 @@ class GPUTriangleEngine final : public FluxEngine<NumericType, D> {
     if constexpr (D == 2) {
       viennaray::LineMesh lineMesh;
       lineMesh.gridDelta = static_cast<float>(context.domain->getGridDelta());
-      lineMesh.lines = surfaceMesh_->lines;
-      lineMesh.nodes = surfaceMesh_->nodes;
+      lineMesh.lines = std::move(surfaceMesh_->lines);
+      lineMesh.nodes = std::move(surfaceMesh_->nodes);
       lineMesh.minimumExtent = surfaceMesh_->minimumExtent;
       lineMesh.maximumExtent = surfaceMesh_->maximumExtent;
       assert(!lineMesh.lines.empty());
@@ -156,10 +152,7 @@ class GPUTriangleEngine final : public FluxEngine<NumericType, D> {
       surfaceMesh_->maximumExtent = triangleMesh.maximumExtent;
     }
 
-    auto model =
-        std::dynamic_pointer_cast<gpu::ProcessModelGPU<NumericType, D>>(
-            context.model);
-    if (model->useMaterialIds()) {
+    if (model_->useMaterialIds()) {
       auto const &pointMaterialIds =
           *context.diskMesh->getCellData().getScalarData("MaterialIds");
       std::vector<int> elementMaterialIds;
@@ -190,13 +183,10 @@ class GPUTriangleEngine final : public FluxEngine<NumericType, D> {
       SmartPointer<viennals::PointData<NumericType>> &fluxes) override {
 
     this->timer_.start();
-    auto model =
-        std::dynamic_pointer_cast<gpu::ProcessModelGPU<NumericType, D>>(
-            context.model);
 
     CudaBuffer d_coverages; // device buffer for coverages
     if (context.flags.useCoverages) {
-      auto coverages = model->getSurfaceModel()->getCoverages();
+      auto coverages = model_->getSurfaceModel()->getCoverages();
       assert(coverages);
       assert(context.diskMesh);
       assert(context.translationField);
@@ -217,22 +207,24 @@ class GPUTriangleEngine final : public FluxEngine<NumericType, D> {
     }
 
     // run the ray tracer
-    rayTracer_.apply();
-    rayTracer_.normalizeResults();
-
-    // extract fluxes on points
-    ElementToPointData<NumericType, float, viennaray::gpu::ResultType, true,
-                       D == 3>(
-        IndexMap(rayTracer_.getParticles()), rayTracer_.getResults(), fluxes,
-        elementKdTree_, context.diskMesh, surfaceMesh_,
+    rayTracer_.apply(); // device detach point here
+
+    // Prepare post-processing
+    PostProcessingType postProcessing(
+        model_->getParticleDataLabels(), fluxes, elementKdTree_,
+        context.diskMesh, surfaceMesh_,
         context.domain->getGridDelta() *
-            (context.rayTracingParams.smoothingNeighbors + 1))
-        .apply();
+            (context.rayTracingParams.smoothingNeighbors + 1));
+    postProcessing.prepare();
+
+    rayTracer_.normalizeResults(); // device sync point here
+    postProcessing.setElementDataArrays(rayTracer_.getResults());
+    postProcessing.convert(); // run post-processing
 
     // output
     if (Logger::hasIntermediate()) {
       if (context.flags.useCoverages) {
-        auto coverages = model->getSurfaceModel()->getCoverages();
+        auto coverages = model_->getSurfaceModel()->getCoverages();
         downloadCoverages(d_coverages, surfaceMesh_->getCellData(), coverages,
                           surfaceMesh_->getElements<3>().size());
       }
@@ -299,6 +291,7 @@ class GPUTriangleEngine final : public FluxEngine<NumericType, D> {
 private:
   std::shared_ptr<DeviceContext> deviceContext_;
   viennaray::gpu::TraceTriangle<NumericType, D> rayTracer_;
+  SmartPointer<gpu::ProcessModelGPU<NumericType, D>> model_;
 
   KDTreeType elementKdTree_;
   MeshType surfaceMesh_;
diff --git a/include/viennaps/process/psProcess.hpp b/include/viennaps/process/psProcess.hpp
index b2039d4d..154e3886 100644
--- a/include/viennaps/process/psProcess.hpp
+++ b/include/viennaps/process/psProcess.hpp
@@ -86,13 +86,9 @@ template <typename NumericType, int D> class Process {
 
   void apply() {
 
-    if (!checkInput())
+    if (!checkInputUpdateContext())
       return;
 
-    // Update context with current state
-    context_.updateFlags();
-    context_.printFlags();
-
     // Find appropriate strategy
     auto strategy = findStrategy();
     if (!strategy) {
@@ -120,10 +116,9 @@ template <typename NumericType, int D> class Process {
   }
 
   SmartPointer<viennals::Mesh<NumericType>> calculateFlux() {
-    if (!checkInput())
+    if (!checkInputUpdateContext())
       return nullptr;
 
-    context_.updateFlags();
     const auto name = context_.getProcessName();
     if (!context_.flags.useFluxEngine) {
       VIENNACORE_LOG_ERROR("Process model '" + name +
@@ -267,7 +262,7 @@ template <typename NumericType, int D> class Process {
 #endif
 
 public:
-  bool checkInput() {
+  bool checkInputUpdateContext() {
     if (!context_.domain) {
       VIENNACORE_LOG_ERROR("No domain passed to Process.");
       return false;
@@ -283,10 +278,19 @@ template <typename NumericType, int D> class Process {
       return false;
     }
 
+    // Update context with current state
+    context_.updateFlags();
+    context_.printFlags();
+
     // Auto-select engine type
     if (fluxEngineType_ == FluxEngineType::AUTO) {
       if (gpuAvailable() && context_.model->hasGPUModel()) {
-        fluxEngineType_ = FluxEngineType::GPU_TRIANGLE;
+        // Prefer disks if boundary is periodic in any direction
+        if (context_.flags.domainHasPeriodicBoundaries) {
+          fluxEngineType_ = FluxEngineType::GPU_DISK;
+        } else {
+          fluxEngineType_ = FluxEngineType::GPU_TRIANGLE;
+        }
       } else {
         fluxEngineType_ = FluxEngineType::CPU_DISK;
       }
diff --git a/include/viennaps/process/psProcessContext.hpp b/include/viennaps/process/psProcessContext.hpp
index 3de17c7f..87144cae 100644
--- a/include/viennaps/process/psProcessContext.hpp
+++ b/include/viennaps/process/psProcessContext.hpp
@@ -48,9 +48,12 @@ template <typename NumericType, int D> struct ProcessContext {
     bool isALP = false;
     bool isAnalytic = false;
     bool isGeometric = false;
+    bool domainHasPeriodicBoundaries = false;
   } flags;
 
   void updateFlags() {
+    assert(model && "Process model must be set before updating flags.");
+    assert(domain && "Domain must be set before updating flags.");
     flags.isGeometric = model->getGeometricModel() != nullptr;
     flags.useFluxEngine = model->useFluxEngine();
     flags.useAdvectionCallback = model->getAdvectionCallback() != nullptr;
@@ -60,6 +63,15 @@ template <typename NumericType, int D> struct ProcessContext {
     flags.isAnalytic = model->getVelocityField() && !model->useFluxEngine();
     flags.isALP = model->isALPModel();
     flags.useCoverages = flags.isALP || flags.useCoverages;
+
+    const auto &grid = domain->getGrid();
+    for (unsigned i = 0; i < D; ++i) {
+      if (grid.getBoundaryConditions(i) ==
+          viennals::BoundaryConditionEnum::PERIODIC_BOUNDARY) {
+        flags.domainHasPeriodicBoundaries = true;
+        break;
+      }
+    }
   }
 
   void printFlags() const {
@@ -76,7 +88,9 @@ template <typename NumericType, int D> struct ProcessContext {
            << util::boolString(flags.useProcessParams)
            << "\n\tuseCoverages: " << util::boolString(flags.useCoverages)
            << "\n\tisALP: " << util::boolString(flags.isALP)
-           << "\n\tisAnalytic: " << util::boolString(flags.isAnalytic);
+           << "\n\tisAnalytic: " << util::boolString(flags.isAnalytic)
+           << "\n\tdomainHasPeriodicBoundaries: "
+           << util::boolString(flags.domainHasPeriodicBoundaries);
     if (model) {
       stream << "\nProcess Name: "
              << model->getProcessName().value_or("default");
diff --git a/include/viennaps/process/psProcessModel.hpp b/include/viennaps/process/psProcessModel.hpp
index e8b7a961..2ed6a471 100644
--- a/include/viennaps/process/psProcessModel.hpp
+++ b/include/viennaps/process/psProcessModel.hpp
@@ -156,6 +156,17 @@ class ProcessModelCPU : public ProcessModelBase<NumericType, D> {
   void setSource(SmartPointer<viennaray::Source<NumericType>> passedSource) {
     source = passedSource;
   }
+
+  auto getParticleDataLabels() const {
+    std::vector<std::string> dataLabels;
+    for (size_t pIdx = 0; pIdx < particles.size(); pIdx++) {
+      auto labels = particles[pIdx]->getLocalDataLabels();
+      for (size_t dIdx = 0; dIdx < labels.size(); dIdx++) {
+        dataLabels.push_back(labels[dIdx]);
+      }
+    }
+    return dataLabels;
+  }
 };
 
 } // namespace viennaps
@@ -218,6 +229,16 @@ class ProcessModelGPU : public ProcessModelBase<NumericType, D> {
   setPrimaryDirection(const std::array<NumericType, 3> passedPrimaryDirection) {
     primaryDirection = Normalize(passedPrimaryDirection);
   }
+
+  auto getParticleDataLabels() const {
+    std::vector<std::string> dataLabels;
+    for (size_t pIdx = 0; pIdx < particles.size(); pIdx++) {
+      for (size_t dIdx = 0; dIdx < particles[pIdx].dataLabels.size(); dIdx++) {
+        dataLabels.push_back(particles[pIdx].dataLabels[dIdx]);
+      }
+    }
+    return dataLabels;
+  }
 };
 
 } // namespace viennaps::gpu
diff --git a/include/viennaps/psElementToPointData.hpp b/include/viennaps/psElementToPointData.hpp
index ece162ce..68564665 100644
--- a/include/viennaps/psElementToPointData.hpp
+++ b/include/viennaps/psElementToPointData.hpp
@@ -13,95 +13,43 @@ namespace viennaps {
 
 using namespace viennacore;
 
-class IndexMap {
-  std::vector<std::string> dataLabels;
-
-public:
-  IndexMap() = default;
-
-#ifdef VIENNACORE_COMPILE_GPU
-  template <class T>
-  explicit IndexMap(const std::vector<viennaray::gpu::Particle<T>> &particles) {
-    for (size_t pIdx = 0; pIdx < particles.size(); pIdx++) {
-      for (size_t dIdx = 0; dIdx < particles[pIdx].dataLabels.size(); dIdx++) {
-        dataLabels.push_back(particles[pIdx].dataLabels[dIdx]);
-      }
-    }
-  }
-#endif
-
-  template <class T>
-  explicit IndexMap(
-      const std::vector<std::unique_ptr<viennaray::AbstractParticle<T>>>
-          &particles) {
-    for (size_t pIdx = 0; pIdx < particles.size(); pIdx++) {
-      auto labels = particles[pIdx]->getLocalDataLabels();
-      for (size_t dIdx = 0; dIdx < labels.size(); dIdx++) {
-        dataLabels.push_back(labels[dIdx]);
-      }
-    }
-  }
-
-  void insertNextDataLabel(std::string dataLabel) {
-    dataLabels.push_back(std::move(dataLabel));
-  }
-
-  std::size_t getIndex(const std::string &label) const {
-    for (std::size_t idx = 0; idx < dataLabels.size(); idx++) {
-      if (dataLabels[idx] == label) {
-        return idx;
-      }
-    }
-    assert(false && "Data label not found");
-    return 0;
-  }
-
-  [[nodiscard]] const std::string &getLabel(std::size_t idx) const {
-    assert(idx < dataLabels.size());
-    return dataLabels[idx];
-  }
-
-  std::size_t getNumberOfData() const { return dataLabels.size(); }
-
-  std::vector<std::string>::const_iterator begin() const {
-    return dataLabels.cbegin();
-  }
-
-  std::vector<std::string>::const_iterator end() const {
-    return dataLabels.cend();
-  }
-};
-
 template <typename NumericType, typename MeshNT, typename ResultType,
           bool d2 = true, bool d4 = true>
 class ElementToPointData {
-  const IndexMap indexMap_;
-  std::vector<std::vector<ResultType>> const &elementDataArrays_;
+  const std::vector<std::string> dataLabels_;
   SmartPointer<viennals::PointData<NumericType>> pointData_;
   SmartPointer<KDTree<NumericType, Vec3D<NumericType>>> elementKdTree_;
   SmartPointer<viennals::Mesh<NumericType>> diskMesh_;
   SmartPointer<viennals::Mesh<MeshNT>> surfaceMesh_;
   const NumericType conversionRadius_;
 
+  std::vector<std::vector<ResultType>> elementDataArrays_;
+  std::vector<std::tuple<std::vector<size_t>, std::vector<double>, unsigned>>
+      closeElements_;
+
   static constexpr bool discard2 = d2;
   static constexpr bool discard4 = d4;
 
 public:
   ElementToPointData(
-      IndexMap indexMap,
-      std::vector<std::vector<ResultType>> const &elementDataArrays,
-      SmartPointer<viennals::PointData<NumericType>> pointData,
+      const std::vector<std::string> &dataLabels,
+      SmartPointer<viennals::PointData<NumericType>>
+          pointData, // target point data
       SmartPointer<KDTree<NumericType, Vec3D<NumericType>>> elementKdTree,
       SmartPointer<viennals::Mesh<NumericType>> diskMesh,
       SmartPointer<viennals::Mesh<MeshNT>> surfMesh,
       const NumericType conversionRadius)
-      : indexMap_(std::move(indexMap)), elementDataArrays_(elementDataArrays),
-        pointData_(pointData), elementKdTree_(elementKdTree),
-        diskMesh_(diskMesh), surfaceMesh_(surfMesh),
-        conversionRadius_(conversionRadius) {}
+      : dataLabels_(dataLabels), pointData_(pointData),
+        elementKdTree_(elementKdTree), diskMesh_(diskMesh),
+        surfaceMesh_(surfMesh), conversionRadius_(conversionRadius) {}
 
-  void apply() {
-    const auto numData = indexMap_.getNumberOfData();
+  void setElementDataArrays(
+      std::vector<std::vector<ResultType>> &&elementDataArrays) {
+    elementDataArrays_ = std::move(elementDataArrays);
+  }
+
+  void prepare() {
+    const auto numData = dataLabels_.size();
     const auto &points = diskMesh_->nodes;
     const auto numPoints = points.size();
     const auto numElements = elementKdTree_->getNumberOfPoints();
@@ -110,16 +58,12 @@ class ElementToPointData {
 
     // prepare point data container
     pointData_->clear();
-    for (const auto &label : indexMap_) {
+    for (const auto &label : dataLabels_) {
       std::vector<NumericType> data(numPoints, 0.);
       pointData_->insertNextScalarData(std::move(data), label);
     }
 
-    if (elementDataArrays_.size() != numData) {
-      VIENNACORE_LOG_ERROR(
-          "ElementToPointData: "
-          "Number of data arrays does not match expected count.");
-    }
+    closeElements_.resize(numPoints);
 
 #pragma omp parallel for schedule(static)
     for (unsigned i = 0; i < numPoints; i++) {
@@ -130,6 +74,7 @@ class ElementToPointData {
               .value();
 
       std::vector<double> weights(closeElements.size(), 0.);
+      std::vector<size_t> elementIndices(closeElements.size(), 0);
 
       // compute weights based on normal alignment
       unsigned numClosePoints = 0;
@@ -142,47 +87,57 @@ class ElementToPointData {
 
         if (weight > 1e-6 && !std::isnan(weight)) {
           weights[n] = weight;
+          elementIndices[n] = p.first;
           ++numClosePoints;
         }
       }
 
-      std::size_t nearestIdx = 0;
-      if (numClosePoints == 0) { // fallback to nearest point
-        auto nearestPoint = elementKdTree_->findNearest(points[i]);
-        nearestIdx = nearestPoint->first;
-      }
+      assert(!weights.empty() && !elementIndices.empty());
+      closeElements_[i] =
+          std::make_tuple(elementIndices, weights, numClosePoints);
+    }
+  }
+
+  void convert() {
+    const auto numData = dataLabels_.size();
+    const auto &points = diskMesh_->nodes;
+    const auto numPoints = points.size();
+
+    if (elementDataArrays_.size() != numData) {
+      VIENNACORE_LOG_ERROR(
+          "ElementToPointData: "
+          "Number of data arrays does not match expected count.");
+    }
+
+#pragma omp parallel for schedule(static)
+    for (unsigned i = 0; i < numPoints; i++) {
+
+      const auto &[closeElements, weights, numClosePoints] = closeElements_[i];
 
       for (unsigned j = 0; j < numData; ++j) {
         NumericType value = NumericType(0);
         const auto &elementData = elementDataArrays_[j];
         auto pointData = pointData_->getScalarData(j);
 
-        if (numClosePoints > 0) {
-
-          // Discard outlier values if enabled
-          const auto weightsCopy = discardOutliers(weights, closeElements,
-                                                   elementData, numClosePoints);
+        // Discard outlier values if enabled
+        const auto weightsCopy = discardOutliers(weights, closeElements,
+                                                 elementData, numClosePoints);
 
-          // Compute weighted average
-          const double sum =
-              std::accumulate(weightsCopy.cbegin(), weightsCopy.cend(), 0.0);
+        // Compute weighted average
+        const double sum =
+            std::accumulate(weightsCopy.cbegin(), weightsCopy.cend(), 0.0);
 
-          if (sum > 1e-6) {
-            for (std::size_t k = 0; k < closeElements.size(); ++k) {
-              if (weightsCopy[k] > 0.0) {
-                value += weightsCopy[k] * elementData[closeElements[k].first];
-              }
+        if (sum > 1e-6) {
+          for (size_t k = 0; k < closeElements.size(); ++k) {
+            if (weightsCopy[k] > 0.0) {
+              value += weightsCopy[k] * elementData[closeElements[k]];
             }
-            value /= sum;
-          } else {
-            // Fallback if all weights were discarded
-            auto nearestPoint = elementKdTree_->findNearest(points[i]);
-            value = elementData[nearestPoint->first];
           }
+          value /= sum;
         } else {
-          // Fallback to nearest point
-          assert(numClosePoints == 0);
-          value = elementData[nearestIdx];
+          // Fallback if all weights were discarded
+          auto nearestPoint = elementKdTree_->findNearest(points[i]);
+          value = elementData[nearestPoint->first];
         }
 
         pointData->at(i) = value;
@@ -190,6 +145,11 @@ class ElementToPointData {
     }
   }
 
+  void apply() {
+    prepare();
+    convert();
+  }
+
 private:
   // Helper function to find min/max values and their indices
   struct MinMaxInfo {
@@ -202,15 +162,15 @@ class ElementToPointData {
           minIdx1(-1), minIdx2(-1), maxIdx1(-1), maxIdx2(-1) {}
   };
 
-  static MinMaxInfo findMinMaxValues(
-      const std::vector<double> &weights,
-      const std::vector<std::pair<std::size_t, NumericType>> &closePoints,
-      const std::vector<ResultType> &elementData) {
+  static MinMaxInfo
+  findMinMaxValues(const std::vector<double> &weights,
+                   const std::vector<size_t> &closePoints,
+                   const std::vector<ResultType> &elementData) {
     MinMaxInfo info;
 
     for (std::size_t k = 0; k < closePoints.size(); ++k) {
       if (weights[k] > 0.0) {
-        const auto value = elementData[closePoints[k].first];
+        const auto value = elementData[closePoints[k]];
 
         // Update min values
         if (value < info.min1) {
@@ -239,10 +199,10 @@ class ElementToPointData {
     return info;
   }
 
-  static auto discardOutliers(
-      const std::vector<double> &weights,
-      const std::vector<std::pair<std::size_t, NumericType>> &closePoints,
-      const std::vector<ResultType> &elementData, unsigned numClosePoints) {
+  static auto discardOutliers(const std::vector<double> &weights,
+                              const std::vector<size_t> &closePoints,
+                              const std::vector<ResultType> &elementData,
+                              unsigned numClosePoints) {
 
     // copy weights to modify
     auto weightsCopy = weights;
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 53fca00e..4ad51788 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -86,10 +86,14 @@ if(VIENNAPS_USE_GPU)
   add_dependencies(${VIENNAPS_PYTHON_MODULE_NAME} ${VIENNAPS_GPU_DEPENDENCIES})
 
   # Set PTX path override for runtime kernel loading from site-packages/<pkg>/ptx.
-  # ABSOLUTE path enforced with dynamic module path.
-  target_compile_definitions(
-    ${VIENNAPS_PYTHON_MODULE_NAME} PRIVATE VIENNACORE_KERNELS_PATH_OVERRIDE=ptx
-                                           VIENNACORE_DYNAMIC_MODULE_PATH=1)
+  # ABSOLUTE path enforced with dynamic module path. Only works on Linux based systems.
+  # On Windows the dynamic path is relative to the python3xx.dll location which is not ideal.
+  # So the PTX files are used from the build directory on Windows.
+  if(NOT MSVC)
+    target_compile_definitions(
+      ${VIENNAPS_PYTHON_MODULE_NAME} PRIVATE VIENNACORE_KERNELS_PATH_OVERRIDE=ptx
+                                             VIENNACORE_DYNAMIC_MODULE_PATH=1)
+  endif()
 
   # Install PTX files
   install(DIRECTORY "${VIENNACORE_NVCC_PTX_DIR}/" DESTINATION "${VIENNAPS_PYTHON_PACKAGE_NAME}/ptx")
diff --git a/python/pyWrap.cpp b/python/pyWrap.cpp
index 67bdafd0..47328e16 100644
--- a/python/pyWrap.cpp
+++ b/python/pyWrap.cpp
@@ -19,11 +19,12 @@ struct MaterialInfoPy {
 
 PYBIND11_MODULE(VIENNAPS_MODULE_NAME, module) {
   module.doc() =
-      "ViennaPS is a header-only C++ process simulation library which "
-      "includes surface and volume representations, a ray tracer, and physical "
-      "models for the simulation of microelectronic fabrication processes. The "
-      "main design goals are simplicity and efficiency, tailored towards "
-      "scientific simulations.";
+      "ViennaPS is a topography simulation library for microelectronic "
+      "fabrication processes. It models the evolution of 2D and 3D surfaces "
+      "during etching, deposition, and related steps, combining advanced "
+      "level-set methods for surface evolution with Monte Carlo ray tracing "
+      "for flux calculation. This allows accurate, feature-scale simulation of "
+      "complex fabrication geometries.";
 
   // set version string of python module
   module.attr("__version__") = versionString();
diff --git a/python/scripts/install_ViennaPS_linux.py b/python/scripts/install_ViennaPS.py
similarity index 54%
rename from python/scripts/install_ViennaPS_linux.py
rename to python/scripts/install_ViennaPS.py
index 36b2010e..67f740bc 100644
--- a/python/scripts/install_ViennaPS_linux.py
+++ b/python/scripts/install_ViennaPS.py
@@ -1,37 +1,46 @@
 #!/usr/bin/env python3
 import argparse
 import os
+import platform
 import shutil
 import subprocess
 import sys
 import tempfile
 from pathlib import Path
 
-REQUIRED_GCC = "12"
 REQUIRED_NVCC_MAJOR = 12
 DEFAULT_VIENNALS_VERSION = "5.4.0"
 
+# Detect OS
+IS_WINDOWS = sys.platform == "win32" or os.name == "nt"
+IS_LINUX = sys.platform.startswith("linux")
+OS_NAME = platform.system()
+
+# Global variable to store required GCC version (determined at runtime)
+REQUIRED_GCC = None
+
 
 def run(cmd, **kwargs):
-    print("+", " ".join(cmd))
+    print("+", " ".join(str(c) for c in cmd))
     return subprocess.run(cmd, check=True, **kwargs)
 
 
 def run_capture(cmd, **kwargs):
-    print("+", " ".join(cmd))
+    print("+", " ".join(str(c) for c in cmd))
     return subprocess.run(
         cmd, check=True, stdout=subprocess.PIPE, text=True, **kwargs
     ).stdout.strip()
 
 
-def which_or_fail(name):
+def which_or_fail(name: str) -> str:
     p = shutil.which(name)
     if not p:
-        sys.exit(f"{name} is required but not found in PATH.")
+        sys.exit(f"{name} is required but was not found in PATH.")
     return p
 
 
 def parse_nvcc_version():
+    """Parse nvcc version and return (major, minor, full_string)."""
     out = run_capture(["nvcc", "--version"])
     # look for "release 12.3" etc.
     for line in out.splitlines():
@@ -39,29 +48,155 @@ def parse_nvcc_version():
             part = line.split("release", 1)[1].strip().split(",")[0].strip()
             # part like "12.3" or "12.0"
             try:
-                major = int(part.split(".")[0])
-                return major, part
+                version_parts = part.split(".")
+                major = int(version_parts[0])
+                minor = int(version_parts[1]) if len(version_parts) > 1 else 0
+                return major, minor, part
             except Exception:
                 break
     sys.exit("Could not parse nvcc version. Need CUDA >= 12.0.")
 
 
-def ensure_git():
-    which_or_fail("git")
+def determine_required_gcc_version(
+    nvcc_major: int, nvcc_minor: int
+) -> list[str] | None:
+    """
+    Determine the acceptable GCC versions based on nvcc version.
+
+    Rules:
+    - nvcc < 12.4: gcc 11 or 12
+    - 12.4 <= nvcc < 12.8: gcc 11, 12, or 13
+    - nvcc >= 12.8: any gcc version is ok (return None)
+
+    Returns:
+        List of acceptable GCC versions (e.g., ["11", "12"]) or None if any version is ok.
+    """
+    nvcc_version = nvcc_major * 10 + nvcc_minor
+
+    if nvcc_version < 124:
+        return ["11", "12"]
+    elif nvcc_version < 128:
+        return ["11", "12", "13"]
+    else:
+        return None  # Any GCC version is ok
+
+
+def get_default_gcc_version() -> tuple[int, int] | None:
+    """
+    Get the default gcc version (major, minor).
+    Returns None if gcc is not found or version cannot be parsed.
+    """
+    gcc_path = shutil.which("gcc")
+    if not gcc_path:
+        return None
+
+    try:
+        out = run_capture(["gcc", "--version"])
+        # First line typically contains version, e.g., "gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0"
+        first_line = out.splitlines()[0]
+        # Look for version pattern like "11.4.0"
+        import re
+
+        match = re.search(r"(\d+)\.(\d+)", first_line)
+        if match:
+            major = int(match.group(1))
+            minor = int(match.group(2))
+            return major, minor
+    except Exception:
+        pass
+
+    return None
 
 
 def ensure_compilers():
-    which_or_fail(f"gcc-{REQUIRED_GCC}")
-    which_or_fail(f"g++-{REQUIRED_GCC}")
+    """
+    Check for required compilers based on OS.
+    """
+    if IS_WINDOWS:
+        ensure_msvc()
+    else:
+        ensure_gcc_and_gpp()
+
+
+def ensure_gcc_and_gpp():
+    """
+    Check that GCC and G++ are available (Linux).
+    If specific versions are acceptable based on nvcc, ensure at least one is available.
+    """
+    global REQUIRED_GCC
+
+    if REQUIRED_GCC is None:
+        # No specific version required, just check that gcc exists
+        which_or_fail("gcc")
+        which_or_fail("g++")
+        default_gcc = get_default_gcc_version()
+        if default_gcc:
+            print(f"Using default GCC version: {default_gcc[0]}.{default_gcc[1]}")
+        else:
+            print("Using default GCC version (version detection failed)")
+    else:
+        # Check if any of the acceptable versions is available
+        found_version = None
+        for gcc_ver in REQUIRED_GCC:
+            if shutil.which(f"gcc-{gcc_ver}") and shutil.which(f"g++-{gcc_ver}"):
+                found_version = gcc_ver
+                break
+
+        if found_version is None:
+            # None of the versioned compilers found, check if default gcc is compatible
+            default_gcc = get_default_gcc_version()
+            if default_gcc and str(default_gcc[0]) in REQUIRED_GCC:
+                found_version = str(default_gcc[0])
+                print(
+                    f"Using default GCC-{found_version} (compatible with CUDA version)"
+                )
+            else:
+                sys.exit(
+                    f"ERROR: None of the required GCC versions {REQUIRED_GCC} found.\n"
+                    f"Please install one of: "
+                    + ", ".join([f"gcc-{v}/g++-{v}" for v in REQUIRED_GCC])
+                )
+        else:
+            print(f"Using GCC-{found_version} (compatible with CUDA version)")
+            # Update REQUIRED_GCC to the single version we're using
+            REQUIRED_GCC = found_version
+
+
+def ensure_msvc():
+    """
+    Check that an MSVC toolchain is available (Windows).
+    """
+    print("Checking MSVC toolchain...")
+    cl_path = shutil.which("cl")
+    if not cl_path:
+        print(
+            "WARNING: 'cl.exe' was not found in PATH.\n"
+            "Please run this script from a Visual Studio Developer Command Prompt\n"
+            "(e.g. 'x64 Native Tools Command Prompt for VS 2022')."
+        )
+        # Do not hard-fail here; building will likely fail later anyway.
+    else:
+        print(f"Found MSVC compiler: {cl_path}")
 
 
 def ensure_cuda():
+    """Check CUDA availability and determine required GCC version on Linux."""
+    global REQUIRED_GCC
+
     which_or_fail("nvcc")
-    major, full = parse_nvcc_version()
+    major, minor, full = parse_nvcc_version()
     if major < REQUIRED_NVCC_MAJOR:
         sys.exit(f"CUDA toolkit version 12.0 or higher is required (found {full}).")
     print(f"CUDA toolkit version: {full}")
 
+    # On Linux, determine required GCC version based on nvcc version
+    if IS_LINUX:
+        REQUIRED_GCC = determine_required_gcc_version(major, minor)
+        if REQUIRED_GCC:
+            print(f"CUDA {full} is compatible with GCC: {', '.join(REQUIRED_GCC)}")
+        else:
+            print(f"CUDA {full} works with any GCC version")
+
 
 def create_or_reuse_venv(venv_dir: Path):
     if venv_dir.exists():
@@ -93,14 +228,17 @@ def install_viennals(
     pip_path: Path, viennals_dir: Path | None, required_version: str, verbose: bool
 ):
     env = os.environ.copy()
-    env["CC"] = f"gcc-{REQUIRED_GCC}"
-    env["CXX"] = f"g++-{REQUIRED_GCC}"
+
+    # On Linux, set CC and CXX environment variables
+    if IS_LINUX and REQUIRED_GCC is not None:
+        env["CC"] = f"gcc-{REQUIRED_GCC}"
+        env["CXX"] = f"g++-{REQUIRED_GCC}"
 
     current = pip_show_version(pip_path, "ViennaLS")
     if current is None:
         print("ViennaLS not installed. A local build is required.")
         if viennals_dir is None:
-            ensure_git()
+            which_or_fail("git")  # git is required to clone
             print(f"Cloning ViennaLS v{required_version}…")
             with tempfile.TemporaryDirectory(prefix="ViennaLS_tmp_install_") as tmp:
                 tmp_path = Path(tmp)
@@ -129,12 +267,12 @@ def install_viennals(
             run(cmd, cwd=viennals_dir, env=env)
     else:
         print(
-            f"ViennaLS already installed ({current}). Local build is required and version should be {required_version}."
+            f"ViennaLS already installed ({current}). "
+            f"A local build is required and the version should be {required_version}."
         )
         if current != required_version:
-            sys.exit(
-                f"Version mismatch. Please change to the required version {required_version}.\n"
-                "Then re-run this script."
+            print(
+                "WARNING: Installed ViennaLS version does not match the required version."
             )
         print("Proceeding with the currently installed ViennaLS.")
 
@@ -168,7 +306,6 @@ def get_viennaps_dir(viennaps_dir_arg: str | None) -> Path:
 def install_viennaps(
     pip_path: Path,
     viennaps_dir: Path,
-    optix_dir: Path | None,
     debug_build: bool,
     gpu_build: bool,
     verbose: bool,
@@ -179,36 +316,43 @@ def install_viennaps(
         sys.exit(
             f"{viennaps_dir} does not look like a ViennaPS source directory (missing CMakeLists.txt)."
         )
+
     env = os.environ.copy()
-    env["CC"] = f"gcc-{REQUIRED_GCC}"
-    env["CXX"] = f"g++-{REQUIRED_GCC}"
 
-    # GPU on
-    cmake_args = []
-    if gpu_build:
-        cmake_args = ["-DVIENNAPS_USE_GPU=ON"]
+    # On Linux, set CC and CXX environment variables
+    if IS_LINUX and REQUIRED_GCC is not None:
+        env["CC"] = f"gcc-{REQUIRED_GCC}"
+        env["CXX"] = f"g++-{REQUIRED_GCC}"
 
-    if debug_build:
-        print("Enabling debug build.")
-        env["CMAKE_BUILD_TYPE"] = "Debug"
-        env["SKBUILD_CMAKE_BUILD_TYPE"] = "Debug"
+    cmake_args: list[str] = []
 
-    if optix_dir is not None:
-        env["OptiX_INSTALL_DIR"] = str(optix_dir)
+    # GPU on/off
+    if gpu_build:
+        cmake_args.append("-DVIENNAPS_USE_GPU=ON")
+    else:
+        cmake_args.append("-DVIENNAPS_USE_GPU=OFF")
 
     env["CMAKE_ARGS"] = " ".join(cmake_args)
+
     cmd = [str(pip_path), "install", "--no-deps", "."]
+
+    if debug_build:
+        print("Enabling debug build.")
+        cmd.append("--config-settings=cmake.build-type=Debug")
+
     if verbose:
         cmd.append("-v")
+
+    # Run the installation
     run(cmd, cwd=viennaps_dir, env=env)
 
 
 def main():
     parser = argparse.ArgumentParser(
-        description="Dev setup for ViennaPS with GPU support (Linux)."
+        description=f"Dev setup for ViennaPS with GPU support ({OS_NAME})."
     )
     parser.add_argument(
-        "--verbose", action="store_true", help="Enable verbose pip builds."
+        "--verbose", "-v", action="store_true", help="Enable verbose pip builds."
     )
     parser.add_argument(
         "--venv", default=".venv", help="Path to the virtual environment directory."
@@ -228,11 +372,6 @@ def main():
         default=None,
         help="Path to ViennaPS directory (defaults to current dir if it's ViennaPS).",
     )
-    parser.add_argument(
-        "--optix",
-        default=None,
-        help="Path to OptiX installation directory (optional - will auto-download OptiX headers if not provided).",
-    )
     parser.add_argument(
         "--debug-build",
         action="store_true",
@@ -252,31 +391,8 @@ def main():
 
     if not args.skip_toolchain_check or not args.no_gpu:
         print("Checking toolchain...")
-        ensure_compilers()
         ensure_cuda()
-
-    # OptiX dir
-    if not args.no_gpu:
-        optix_dir = args.optix or os.environ.get("OptiX_INSTALL_DIR")
-        if not optix_dir:
-            print("No OptiX directory provided. Will auto-download OptiX headers.")
-            print("\nWARNING: OptiX uses a different license than ViennaPS.")
-            print(
-                "By proceeding with auto-download, you agree to the NVIDIA OptiX license terms."
-            )
-            print(
-                "Please review the OptiX license at: https://developer.nvidia.com/designworks/sdk-samples-tools-software-license-agreement"
-            )
-            print("If you do not agree, abort now (Ctrl+C).")
-            input("Press Enter to continue...")
-            optix_dir = None
-        else:
-            optix_dir = Path(optix_dir).expanduser().resolve()
-            if not optix_dir.exists():
-                sys.exit(f"OptiX directory not found: {optix_dir}")
-            print(f"Using OptiX at: {optix_dir}")
-    else:
-        optix_dir = None
+        ensure_compilers()
 
     # venv
     venv_dir = Path(args.venv).expanduser().resolve()
@@ -289,14 +405,11 @@ def main():
     )
     install_viennals(venv_pip, viennals_dir, args.viennals_version, args.verbose)
 
-    # ViennaPS dir
+    # ViennaPS
     viennaps_dir = get_viennaps_dir(args.viennaps_dir)
-
-    # ViennaPS install
     install_viennaps(
         venv_pip,
         viennaps_dir,
-        optix_dir,
         args.debug_build,
         not args.no_gpu,
         args.verbose,