C2SM · starkphi · Sep 4, 2025 · Sep 4, 2025 · Sep 5, 2025 · Sep 5, 2025
diff --git a/ci/benchmark_bencher.yml b/ci/benchmark_bencher.yml
@@ -17,7 +17,7 @@ include:
   stage: benchmark
   script:
     - !reference [.bencher_setup_env, setup]
-    - nox -s __bencher_feature_branch_CI-3.10 -- --backend=$BACKEND --grid=$GRID
+    - nox -s __bencher_feature_branch_CI-3.10 -- --backend=$BACKEND --grid=$GRID -k "compile_time_domain"
   parallel:
     matrix:
       - BACKEND: [dace_cpu, dace_gpu, gtfn_cpu, gtfn_gpu]

diff --git a/ci/default.yml b/ci/default.yml
@@ -62,3 +62,47 @@ test_tools_datatests_aarch64:
 #   extends: [.test_model_datatests, .test_template_x86_64]
 test_model_datatests_aarch64:
   extends: [.test_model_datatests, .test_template_aarch64]
+
+
+.test_single_precision:
+  stage: test
+  allow_failure: true
+  script:
+    - nox -s "__test_file-3.10" -- --testfile=model/atmosphere/dycore/tests/dycore/integration_tests/test_velocity_advection.py --backend=$BACKEND -k "test_compute_advection_in_horizontal_momentum_equation or test_compute_advection_in_vertical_momentum_equation"
+  variables:
+    FLOAT_PRECISION: single
+    SLURM_TIMELIMIT: '00:20:00'
+  parallel:
+    matrix:
+      - BACKEND: [embedded, dace_gpu, gtfn_cpu, gtfn_gpu]
+
+test_single_precision_aarch64:
+  extends: [.test_single_precision, .test_template_aarch64]
+
+# TODO(pstark): remove one version of the single precision tests
+.test_single_precision_v2:
+  stage: test
+  script:
+    - nox -s "test_model-3.10(datatest, $COMPONENT)" -- --single-precision --backend=$BACKEND --level=$LEVEL
+  rules:
+    - if: $BACKEND == 'dace_gpu' && $COMPONENT != 'dycore'
+      when: never  # run only in daily CI, to save compute resources
+    - if: $COMPONENT == 'common' && $LEVEL == 'integration'
+      variables:
+        NUM_PROCESSES: 1
+        SLURM_TIMELIMIT: '00:45:00'
+    - if: $BACKEND == 'dace_gpu'
+      variables:
+        NUM_PROCESSES: 8
+        SLURM_TIMELIMIT: '01:00:00'
+    - if: $BACKEND == 'embedded'
+      variables:
+        SLURM_TIMELIMIT: '00:15:00'
+    - when: on_success
+      variables:
+        SLURM_TIMELIMIT: '00:30:00'
+  parallel:
+    matrix:
+      - COMPONENT: [dycore]
+        BACKEND: [embedded, dace_gpu, gtfn_cpu, gtfn_gpu]
+        LEVEL: [integration]
diff --git a/ci/docker/base.Dockerfile b/ci/docker/base.Dockerfile
@@ -37,6 +37,10 @@ ENV PATH="/root/.cargo/bin:${PATH}"
 RUN rustc --version && which rustc && cargo --version && which cargo
 
 # Install Bencher for performance monitoring
+# Update the following comment to trigger a rebuild  to update the CLI:
+# last update: 2025-09-05
+# This is necessary because the cloud version and the CLI version have to match
+# but obviously, version changes do not register in the Dockerfile hash.
 RUN curl --proto '=https' --tlsv1.2 -sSfL https://bencher.dev/download/install-cli.sh | sh
 RUN bencher --version && which bencher
 

diff --git a/model/atmosphere/advection/src/icon4py/model/atmosphere/advection/advection_horizontal.py b/model/atmosphere/advection/src/icon4py/model/atmosphere/advection/advection_horizontal.py
@@ -127,7 +127,7 @@ def apply_flux_limiter(
             p_mflx_tracer_h=p_mflx_tracer_h,
             r_m=self._r_m,
             p_dtime=dtime,
-            dbl_eps=constants.DBL_EPS,
+            wp_eps=constants.WP_EPS,
             horizontal_start=self._start_cell_lateral_boundary_level_2,  # originally i_rlstart_c = get_startrow_c(startrow_e=5) = 2
             horizontal_end=self._end_cell_local,
             vertical_start=0,

diff --git a/model/atmosphere/advection/src/icon4py/model/atmosphere/advection/advection_vertical.py b/model/atmosphere/advection/src/icon4py/model/atmosphere/advection/advection_vertical.py
@@ -785,7 +785,7 @@ def _compute_numerical_flux(
             k=self._k_field,
             slevp1_ti=self._slevp1_ti,
             nlev=self._nlev,
-            dbl_eps=constants.DBL_EPS,
+            wp_eps=constants.WP_EPS,
             p_dtime=dtime,
             horizontal_start=horizontal_start,
             horizontal_end=horizontal_end,

diff --git a/.../atmosphere/advection/stencils/compute_monotone_horizontal_multiplicative_flux_factors.py b/.../atmosphere/advection/stencils/compute_monotone_horizontal_multiplicative_flux_factors.py
@@ -40,10 +40,10 @@ def _compute_monotone_horizontal_multiplicative_flux_factors_p_m(
     z_tracer_new_low: fa.CellKField[ta.wpfloat],
     z_max: fa.CellKField[ta.vpfloat],
     z_min: fa.CellKField[ta.vpfloat],
-    dbl_eps: ta.wpfloat,
+    wp_eps: ta.wpfloat,
 ) -> tuple[fa.CellKField[ta.wpfloat], fa.CellKField[ta.wpfloat]]:
-    r_p = (astype(z_max, wpfloat) - z_tracer_new_low) / (astype(z_mflx_anti_in, wpfloat) + dbl_eps)
-    r_m = (z_tracer_new_low - astype(z_min, wpfloat)) / (astype(z_mflx_anti_out, wpfloat) + dbl_eps)
+    r_p = (astype(z_max, wpfloat) - z_tracer_new_low) / (astype(z_mflx_anti_in, wpfloat) + wp_eps)
+    r_m = (z_tracer_new_low - astype(z_min, wpfloat)) / (astype(z_mflx_anti_out, wpfloat) + wp_eps)
 
     return r_p, r_m
 
@@ -57,7 +57,7 @@ def _compute_monotone_horizontal_multiplicative_flux_factors(
     z_tracer_new_low: fa.CellKField[ta.wpfloat],
     beta_fct: ta.wpfloat,
     r_beta_fct: ta.wpfloat,
-    dbl_eps: ta.wpfloat,
+    wp_eps: ta.wpfloat,
 ) -> tuple[fa.CellKField[ta.wpfloat], fa.CellKField[ta.wpfloat]]:
     z_max, z_min = _compute_monotone_horizontal_multiplicative_flux_factors_min_max(
         z_tracer_max, z_tracer_min, beta_fct, r_beta_fct
@@ -69,7 +69,7 @@ def _compute_monotone_horizontal_multiplicative_flux_factors(
         z_tracer_new_low,
         z_max,
         z_min,
-        dbl_eps,
+        wp_eps,
     )
     return r_p, r_m
 
@@ -85,7 +85,7 @@ def compute_monotone_horizontal_multiplicative_flux_factors(
     r_m: fa.CellKField[ta.wpfloat],
     beta_fct: ta.wpfloat,
     r_beta_fct: ta.wpfloat,
-    dbl_eps: ta.wpfloat,
+    wp_eps: ta.wpfloat,
     horizontal_start: gtx.int32,
     horizontal_end: gtx.int32,
     vertical_start: gtx.int32,
@@ -99,7 +99,7 @@ def compute_monotone_horizontal_multiplicative_flux_factors(
         z_tracer_new_low,
         beta_fct,
         r_beta_fct,
-        dbl_eps,
+        wp_eps,
         out=(r_p, r_m),
         domain={
             dims.CellDim: (horizontal_start, horizontal_end),

diff --git a/...ere/advection/stencils/compute_positive_definite_horizontal_multiplicative_flux_factor.py b/...ere/advection/stencils/compute_positive_definite_horizontal_multiplicative_flux_factor.py
@@ -20,10 +20,10 @@ def _compute_positive_definite_horizontal_multiplicative_flux_factor(
     p_rhodz_now: fa.CellKField[ta.wpfloat],
     p_mflx_tracer_h: fa.EdgeKField[ta.wpfloat],
     p_dtime: ta.wpfloat,
-    dbl_eps: ta.wpfloat,
+    wp_eps: ta.wpfloat,
 ) -> fa.CellKField[ta.wpfloat]:
     p_m = neighbor_sum(maximum(0.0, p_mflx_tracer_h(C2E) * geofac_div * p_dtime), axis=C2EDim)
-    r_m = minimum(1.0, (p_cc * p_rhodz_now) / (p_m + dbl_eps))
+    r_m = minimum(1.0, (p_cc * p_rhodz_now) / (p_m + wp_eps))
     return r_m
 
 
@@ -35,7 +35,7 @@ def compute_positive_definite_horizontal_multiplicative_flux_factor(
     p_mflx_tracer_h: fa.EdgeKField[ta.wpfloat],
     r_m: fa.CellKField[ta.wpfloat],
     p_dtime: ta.wpfloat,
-    dbl_eps: ta.wpfloat,
+    wp_eps: ta.wpfloat,
     horizontal_start: gtx.int32,
     horizontal_end: gtx.int32,
     vertical_start: gtx.int32,
@@ -47,7 +47,7 @@ def compute_positive_definite_horizontal_multiplicative_flux_factor(
         p_rhodz_now,
         p_mflx_tracer_h,
         p_dtime,
-        dbl_eps,
+        wp_eps,
         out=r_m,
         domain={
             dims.CellDim: (horizontal_start, horizontal_end),

diff --git a/...vection/src/icon4py/model/atmosphere/advection/stencils/compute_ppm4gpu_courant_number.py b/...vection/src/icon4py/model/atmosphere/advection/stencils/compute_ppm4gpu_courant_number.py
@@ -9,8 +9,9 @@
 import gt4py.next as gtx
 from gt4py.next import abs, where  # noqa: A004
 
-from icon4py.model.common import dimension as dims, field_type_aliases as fa, type_alias as ta
+from icon4py.model.common import dimension as dims, field_type_aliases as fa
 from icon4py.model.common.dimension import Koff
+from icon4py.model.common.type_alias import wpfloat
 
 
 # TODO(dastrm): this stencil has no test
@@ -19,13 +20,13 @@
 
 @gtx.field_operator
 def _compute_courant_number_below(
-    p_cellmass_now: fa.CellKField[ta.wpfloat],
-    z_mass: fa.CellKField[ta.wpfloat],
-    z_cfl: fa.CellKField[ta.wpfloat],
+    p_cellmass_now: fa.CellKField[wpfloat],
+    z_mass: fa.CellKField[wpfloat],
+    z_cfl: fa.CellKField[wpfloat],
     k: fa.KField[gtx.int32],
     nlev: gtx.int32,
-    dbl_eps: ta.wpfloat,
-) -> fa.CellKField[ta.wpfloat]:
+    wp_eps: wpfloat,
+) -> fa.CellKField[wpfloat]:
     z_mass_pos = z_mass > 0.0
 
     in_bounds_p0 = k <= nlev - 1
@@ -63,20 +64,20 @@ def _compute_courant_number_below(
     p_cellmass_now_jks = where(mass_gt_cellmass_p3, p_cellmass_now(Koff[4]), p_cellmass_now_jks)
 
     z_cflfrac = where(z_mass_pos, z_mass / p_cellmass_now_jks, 0.0)
-    z_cfl = z_cfl + where(z_cflfrac < 1.0, z_cflfrac, 1.0 - dbl_eps)
+    z_cfl = z_cfl + where(z_cflfrac < 1.0, z_cflfrac, 1.0 - wp_eps)
 
     return z_cfl
 
 
 @gtx.field_operator
 def _compute_courant_number_above(
-    p_cellmass_now: fa.CellKField[ta.wpfloat],
-    z_mass: fa.CellKField[ta.wpfloat],
-    z_cfl: fa.CellKField[ta.wpfloat],
+    p_cellmass_now: fa.CellKField[wpfloat],
+    z_mass: fa.CellKField[wpfloat],
+    z_cfl: fa.CellKField[wpfloat],
     k: fa.KField[gtx.int32],
     slevp1_ti: gtx.int32,
-    dbl_eps: ta.wpfloat,
-) -> fa.CellKField[ta.wpfloat]:
+    wp_eps: wpfloat,
+) -> fa.CellKField[wpfloat]:
     z_mass_neg = z_mass <= 0.0
 
     in_bounds_m0 = k >= slevp1_ti + 1
@@ -116,26 +117,26 @@ def _compute_courant_number_above(
     z_cfl = z_cfl - where(mass_gt_cellmass_m3, 1.0, 0.0)
 
     z_cflfrac = where(z_mass_neg, z_mass / p_cellmass_now_jks, 0.0)
-    z_cfl = z_cfl + where(abs(z_cflfrac) < 1.0, z_cflfrac, dbl_eps - 1.0)
+    z_cfl = z_cfl + where(abs(z_cflfrac) < 1.0, z_cflfrac, wp_eps - 1.0)
 
     return z_cfl
 
 
 @gtx.field_operator
 def _compute_ppm4gpu_courant_number(
-    p_mflx_contra_v: fa.CellKField[ta.wpfloat],
-    p_cellmass_now: fa.CellKField[ta.wpfloat],
-    z_cfl: fa.CellKField[ta.wpfloat],
+    p_mflx_contra_v: fa.CellKField[wpfloat],
+    p_cellmass_now: fa.CellKField[wpfloat],
+    z_cfl: fa.CellKField[wpfloat],
     k: fa.KField[gtx.int32],
     slevp1_ti: gtx.int32,
     nlev: gtx.int32,
-    dbl_eps: ta.wpfloat,
-    p_dtime: ta.wpfloat,
-) -> fa.CellKField[ta.wpfloat]:
+    wp_eps: wpfloat,
+    p_dtime: wpfloat,
+) -> fa.CellKField[wpfloat]:
     z_mass = p_dtime * p_mflx_contra_v
 
-    cfl_below = _compute_courant_number_below(p_cellmass_now, z_mass, z_cfl, k, nlev, dbl_eps)
-    cfl_above = _compute_courant_number_above(p_cellmass_now, z_mass, z_cfl, k, slevp1_ti, dbl_eps)
+    cfl_below = _compute_courant_number_below(p_cellmass_now, z_mass, z_cfl, k, nlev, wp_eps)
+    cfl_above = _compute_courant_number_above(p_cellmass_now, z_mass, z_cfl, k, slevp1_ti, wp_eps)
 
     z_cfl = cfl_below + cfl_above
 
@@ -144,14 +145,14 @@ def _compute_ppm4gpu_courant_number(
 
 @gtx.program(grid_type=gtx.GridType.UNSTRUCTURED)
 def compute_ppm4gpu_courant_number(
-    p_mflx_contra_v: fa.CellKField[ta.wpfloat],
-    p_cellmass_now: fa.CellKField[ta.wpfloat],
-    z_cfl: fa.CellKField[ta.wpfloat],
+    p_mflx_contra_v: fa.CellKField[wpfloat],
+    p_cellmass_now: fa.CellKField[wpfloat],
+    z_cfl: fa.CellKField[wpfloat],
     k: fa.KField[gtx.int32],
     slevp1_ti: gtx.int32,
     nlev: gtx.int32,
-    dbl_eps: ta.wpfloat,
-    p_dtime: ta.wpfloat,
+    wp_eps: wpfloat,
+    p_dtime: wpfloat,
     horizontal_start: gtx.int32,
     horizontal_end: gtx.int32,
     vertical_start: gtx.int32,
@@ -164,7 +165,7 @@ def compute_ppm4gpu_courant_number(
         k,
         slevp1_ti,
         nlev,
-        dbl_eps,
+        wp_eps,
         p_dtime,
         out=z_cfl,
         domain={

diff --git a/...el/atmosphere/advection/stencils/prepare_numerical_quadrature_for_cubic_reconstruction.py b/...el/atmosphere/advection/stencils/prepare_numerical_quadrature_for_cubic_reconstruction.py
@@ -51,7 +51,7 @@ def _prepare_numerical_quadrature_for_cubic_reconstruction(
     wgt_zeta_2: ta.wpfloat,
     wgt_eta_1: ta.wpfloat,
     wgt_eta_2: ta.wpfloat,
-    dbl_eps: ta.wpfloat,
+    wp_eps: ta.wpfloat,
     eps: ta.wpfloat,
 ) -> tuple[
     fa.EdgeKField[ta.vpfloat],
@@ -97,7 +97,7 @@ def _prepare_numerical_quadrature_for_cubic_reconstruction(
     p_coords_dreg_v_3_y_wp = astype(p_coords_dreg_v_3_y, wpfloat)
     p_coords_dreg_v_4_y_wp = astype(p_coords_dreg_v_4_y, wpfloat)
 
-    wgt_t_detjac_1 = dbl_eps + z_wgt_1 * (
+    wgt_t_detjac_1 = wp_eps + z_wgt_1 * (
         (
             z_eta_1_1 * (p_coords_dreg_v_2_x_wp - p_coords_dreg_v_1_x_wp)
             + z_eta_1_2 * (p_coords_dreg_v_3_x_wp - p_coords_dreg_v_4_x_wp)
@@ -115,7 +115,7 @@ def _prepare_numerical_quadrature_for_cubic_reconstruction(
             - z_eta_1_4 * (p_coords_dreg_v_2_x_wp - p_coords_dreg_v_3_x_wp)
         )
     )
-    wgt_t_detjac_2 = dbl_eps + z_wgt_2 * (
+    wgt_t_detjac_2 = wp_eps + z_wgt_2 * (
         (
             z_eta_2_1 * (p_coords_dreg_v_2_x_wp - p_coords_dreg_v_1_x_wp)
             + z_eta_2_2 * (p_coords_dreg_v_3_x_wp - p_coords_dreg_v_4_x_wp)
@@ -133,7 +133,7 @@ def _prepare_numerical_quadrature_for_cubic_reconstruction(
             - z_eta_2_4 * (p_coords_dreg_v_2_x_wp - p_coords_dreg_v_3_x_wp)
         )
     )
-    wgt_t_detjac_3 = dbl_eps + z_wgt_3 * (
+    wgt_t_detjac_3 = wp_eps + z_wgt_3 * (
         (
             z_eta_3_1 * (p_coords_dreg_v_2_x_wp - p_coords_dreg_v_1_x_wp)
             + z_eta_3_2 * (p_coords_dreg_v_3_x_wp - p_coords_dreg_v_4_x_wp)
@@ -151,7 +151,7 @@ def _prepare_numerical_quadrature_for_cubic_reconstruction(
             - z_eta_3_4 * (p_coords_dreg_v_2_x_wp - p_coords_dreg_v_3_x_wp)
         )
     )
-    wgt_t_detjac_4 = dbl_eps + z_wgt_4 * (
+    wgt_t_detjac_4 = wp_eps + z_wgt_4 * (
         (
             z_eta_4_1 * (p_coords_dreg_v_2_x_wp - p_coords_dreg_v_1_x_wp)
             + z_eta_4_2 * (p_coords_dreg_v_3_x_wp - p_coords_dreg_v_4_x_wp)
@@ -342,7 +342,7 @@ def prepare_numerical_quadrature_for_cubic_reconstruction(
     wgt_zeta_2: ta.wpfloat,
     wgt_eta_1: ta.wpfloat,
     wgt_eta_2: ta.wpfloat,
-    dbl_eps: ta.wpfloat,
+    wp_eps: ta.wpfloat,
     eps: ta.wpfloat,
     horizontal_start: gtx.int32,
     horizontal_end: gtx.int32,
@@ -386,7 +386,7 @@ def prepare_numerical_quadrature_for_cubic_reconstruction(
         wgt_zeta_2,
         wgt_eta_1,
         wgt_eta_2,
-        dbl_eps,
+        wp_eps,
         eps,
         out=(
             p_quad_vector_sum_1,