From 4f514dbd70c89e3bae03a59f1dc9837acf25885c Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 6 Aug 2025 17:20:04 +0000 Subject: [PATCH] Rewrite byte_extract from multi-dimensional array As a follow-up to 78839a978: add support for rewriting multiple-of-element size access to arrays when working with multi-dimensional arrays. Fixes: #8796 --- regression/cbmc/Pointer_array8/test.desc | 2 +- .../main.c | 122 ++++++++++++++++++ .../program.desc | 8 ++ .../test.desc | 8 ++ src/util/pointer_offset_size.cpp | 37 ++++-- 5 files changed, 168 insertions(+), 9 deletions(-) create mode 100644 regression/contracts-dfcc/multi_dimensional_array_performance/main.c create mode 100644 regression/contracts-dfcc/multi_dimensional_array_performance/program.desc create mode 100644 regression/contracts-dfcc/multi_dimensional_array_performance/test.desc diff --git a/regression/cbmc/Pointer_array8/test.desc b/regression/cbmc/Pointer_array8/test.desc index 1c039664a91..eba84106901 100644 --- a/regression/cbmc/Pointer_array8/test.desc +++ b/regression/cbmc/Pointer_array8/test.desc @@ -1,4 +1,4 @@ -CORE no-new-smt +CORE no-new-smt broken-cprover-smt-backend main.c ^EXIT=0$ diff --git a/regression/contracts-dfcc/multi_dimensional_array_performance/main.c b/regression/contracts-dfcc/multi_dimensional_array_performance/main.c new file mode 100644 index 00000000000..fc2cf4ec475 --- /dev/null +++ b/regression/contracts-dfcc/multi_dimensional_array_performance/main.c @@ -0,0 +1,122 @@ +#include + +#define __contract__(x) x +#define __loop__(x) x + +/* https://diffblue.github.io/cbmc/contracts-assigns.html */ +#define assigns(...) __CPROVER_assigns(__VA_ARGS__) + +/* https://diffblue.github.io/cbmc/contracts-requires-ensures.html */ +#define requires(...) __CPROVER_requires(__VA_ARGS__) +#define ensures(...) __CPROVER_ensures(__VA_ARGS__) +/* https://diffblue.github.io/cbmc/contracts-loops.html */ +#define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) + +#define memory_slice(...) __CPROVER_object_upto(__VA_ARGS__) +#define memory_no_alias(...) __CPROVER_is_fresh(__VA_ARGS__) + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define forall(qvar, qvar_lb, qvar_ub, predicate) \ + __CPROVER_forall \ + { \ + unsigned qvar; \ + ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate) \ + } + +/* clang-format on */ + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define CBMC_CONCAT_(left, right) left##right +#define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right) + +#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var, \ + value_lb, value_ub) \ + __CPROVER_forall \ + { \ + unsigned qvar; \ + ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ + } + +#define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ + array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \ + (qvar_ub), (array_var), (value_lb), (value_ub)) +/* clang-format on */ + +#define MLDSA_K 2 +#define MLDSA_L 1 +#define MLDSA_N 16 +#define MLDSA_Q 8380417 + +typedef struct +{ + int32_t coeffs[MLDSA_N]; +} mld_poly; + +typedef struct +{ + mld_poly vec[MLDSA_L]; +} mld_polyvecl; + +typedef struct +{ + mld_polyvecl vec[MLDSA_K]; +} mld_polymat; + +/* clang-format off */ +void mld_poly_permute_bitrev_to_custom(int32_t p[MLDSA_N]) +__contract__( + requires(memory_no_alias(p, sizeof(int32_t) * MLDSA_N)) + requires(array_bound(p, 0, MLDSA_N, 0, MLDSA_Q)) + assigns(memory_slice(p, sizeof(int32_t) * MLDSA_N)) + ensures(array_bound(p, 0, MLDSA_N, 0, MLDSA_Q)) +); + +void mld_polymat_permute_bitrev_to_custom(mld_polymat *mat) +__contract__( + requires(memory_no_alias(mat, sizeof(mld_polymat))) + requires(forall(k1, 0, MLDSA_K, forall(l1, 0, MLDSA_L, + array_bound(mat->vec[k1].vec[l1].coeffs, 0, MLDSA_N, 0, MLDSA_Q)))) + assigns(memory_slice(mat, sizeof(mld_polymat))) + ensures(forall(k1, 0, MLDSA_K, forall(l1, 0, MLDSA_L, + array_bound(mat->vec[k1].vec[l1].coeffs, 0, MLDSA_N, 0, MLDSA_Q)))) +); + +void mld_polymat_permute_bitrev_to_custom(mld_polymat *mat) +{ + unsigned int i, j; + for (i = 0; i < MLDSA_K; i++) + __loop__( + assigns(i, j, memory_slice(mat, sizeof(mld_polymat))) + invariant(i <= MLDSA_K) + invariant(forall(k1, 0, MLDSA_K, forall(l1, 0, MLDSA_L, + array_bound(mat->vec[k1].vec[l1].coeffs, 0, MLDSA_N, 0, MLDSA_Q)))) + ) + { + for (j = 0; j < MLDSA_L; j++) + __loop__( + assigns(j, memory_slice(mat, sizeof(mld_polymat))) + invariant(i <= MLDSA_K) + invariant(j <= MLDSA_L) + invariant(forall(k2, 0, MLDSA_K, forall(l2, 0, MLDSA_L, + array_bound(mat->vec[k2].vec[l2].coeffs, 0, MLDSA_N, 0, MLDSA_Q)))) + ) + { + mld_poly_permute_bitrev_to_custom(mat->vec[i].vec[j].coeffs); + } + } +} +/* clang-format on */ + +int main() +{ + mld_polymat *m; + mld_polymat_permute_bitrev_to_custom(m); +} diff --git a/regression/contracts-dfcc/multi_dimensional_array_performance/program.desc b/regression/contracts-dfcc/multi_dimensional_array_performance/program.desc new file mode 100644 index 00000000000..95a3dc178df --- /dev/null +++ b/regression/contracts-dfcc/multi_dimensional_array_performance/program.desc @@ -0,0 +1,8 @@ +CORE dfcc-only +main.c +--dfcc main --enforce-contract mld_polymat_permute_bitrev_to_custom --replace-call-with-contract mld_poly_permute_bitrev_to_custom --apply-loop-contracts _ --slice-formula --no-array-field-sensitivity --program-only +^EXIT=0$ +^SIGNAL=0$ +-- +byte_extract_(big|little)_endian\([^,]*, [^,]+\* 4[^,]*, signed int\) +-- diff --git a/regression/contracts-dfcc/multi_dimensional_array_performance/test.desc b/regression/contracts-dfcc/multi_dimensional_array_performance/test.desc new file mode 100644 index 00000000000..93e1e188e47 --- /dev/null +++ b/regression/contracts-dfcc/multi_dimensional_array_performance/test.desc @@ -0,0 +1,8 @@ +CORE dfcc-only +main.c +--dfcc main --enforce-contract mld_polymat_permute_bitrev_to_custom --replace-call-with-contract mld_poly_permute_bitrev_to_custom --apply-loop-contracts _ --slice-formula --no-array-field-sensitivity +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +-- diff --git a/src/util/pointer_offset_size.cpp b/src/util/pointer_offset_size.cpp index 56e170fab18..ad93f3d7da3 100644 --- a/src/util/pointer_offset_size.cpp +++ b/src/util/pointer_offset_size.cpp @@ -722,6 +722,9 @@ std::optional get_subexpression_at_offset( return {}; } + const mp_integer elem_size_bytes = + *elem_size_bits / config.ansi_c.char_width; + // If we have an offset C + x (where C is a constant) we can try to // recurse by first looking at the member at offset C. if( @@ -766,14 +769,12 @@ std::optional get_subexpression_at_offset( const exprt &other_factor = mul.op0().is_constant() ? mul.op1() : mul.op0(); - if(const_factor % (*elem_size_bits / config.ansi_c.char_width) != 0) + if(const_factor % elem_size_bytes != 0) return {}; exprt index = mult_exprt{ other_factor, - from_integer( - const_factor / (*elem_size_bits / config.ansi_c.char_width), - other_factor.type())}; + from_integer(const_factor / elem_size_bytes, other_factor.type())}; return get_subexpression_at_offset( index_exprt{ @@ -798,14 +799,34 @@ std::optional get_subexpression_at_offset( const exprt &other_factor = offset_mult.op0().is_constant() ? offset_mult.op1() : offset_mult.op0(); - if(const_factor % (*elem_size_bits / config.ansi_c.char_width) != 0) + if( + target_size_bits < elem_size_bits && const_factor < elem_size_bytes && + elem_size_bytes % const_factor == 0) + { + // this assumes that the index is within bounds of this dimension of the + // array, else we end up going down the wrong access path + const mp_integer index_divider = elem_size_bytes / const_factor; + exprt index = div_exprt{ + other_factor, from_integer(index_divider, other_factor.type())}; + exprt remaining_offset = mult_exprt{ + mod_exprt{ + other_factor, from_integer(index_divider, other_factor.type())}, + from_integer(const_factor, other_factor.type())}; + + return get_subexpression_at_offset( + index_exprt{ + expr, + typecast_exprt::conditional_cast(index, array_type->index_type())}, + remaining_offset, + target_type, + ns); + } + else if(const_factor % elem_size_bytes != 0) return {}; exprt index = mult_exprt{ other_factor, - from_integer( - const_factor / (*elem_size_bits / config.ansi_c.char_width), - other_factor.type())}; + from_integer(const_factor / elem_size_bytes, other_factor.type())}; return get_subexpression_at_offset( index_exprt{