From 64643b09c9132842ae1d49b893b35d029d2fdad6 Mon Sep 17 00:00:00 2001 From: Andrew Kern Date: Wed, 17 Dec 2025 14:26:59 -0800 Subject: [PATCH 1/6] Add SLEEF acceleration for trigonometric functions Extend SLEEF coverage to include sin, cos, tan, asin, acos, atan, and atan2 functions in Eidos. These functions now use SLEEF vectorization when operating on float arrays, with scalar fallback for integer inputs. Changes: - sleef_config.h: Add SLEEF macros for trig functions (AVX2 + ARM NEON) - eidos_simd.h: Add SIMD wrapper functions for all 7 trig functions - eidos_functions_math.cpp: Update trig functions to use SIMD path - eidos_test_functions_math.cpp: Add C++ level tests comparing SLEEF output against scalar std:: functions for numerical correctness SLEEF functions now accelerated: exp, log, log10, log2, sin, cos, tan, asin, acos, atan, atan2 (11 total, up from 4) --- eidos/eidos_functions_math.cpp | 214 ++++++++++++++++++++-------- eidos/eidos_simd.h | 148 +++++++++++++++++++ eidos/eidos_test.cpp | 5 +- eidos/eidos_test.h | 4 + eidos/eidos_test_functions_math.cpp | 205 ++++++++++++++++++++++++++ eidos/sleef/sleef_config.h | 18 +++ 6 files changed, 534 insertions(+), 60 deletions(-) diff --git a/eidos/eidos_functions_math.cpp b/eidos/eidos_functions_math.cpp index cd8d1ecf..4ae02016 100644 --- a/eidos/eidos_functions_math.cpp +++ b/eidos/eidos_functions_math.cpp @@ -109,17 +109,31 @@ EidosValue_SP Eidos_ExecuteFunction_abs(const std::vector &p_argu EidosValue_SP Eidos_ExecuteFunction_acos(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); + EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); - result_SP = EidosValue_SP(float_result); - - for (int value_index = 0; value_index < x_count; ++value_index) - float_result->set_float_no_check(acos(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); - + + if (x_type == EidosValueType::kValueInt) + { + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + result_SP = EidosValue_SP(float_result); + + for (int value_index = 0; value_index < x_count; ++value_index) + float_result->set_float_no_check(acos(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); + } + else if (x_type == EidosValueType::kValueFloat) + { + const double *float_data = x_value->FloatData(); + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + double *float_result_data = float_result->data_mutable(); + result_SP = EidosValue_SP(float_result); + + Eidos_SIMD::acos_float64(float_data, float_result_data, x_count); + } + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -127,17 +141,31 @@ EidosValue_SP Eidos_ExecuteFunction_acos(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_asin(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); + EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); - result_SP = EidosValue_SP(float_result); - - for (int value_index = 0; value_index < x_count; ++value_index) - float_result->set_float_no_check(asin(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); - + + if (x_type == EidosValueType::kValueInt) + { + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + result_SP = EidosValue_SP(float_result); + + for (int value_index = 0; value_index < x_count; ++value_index) + float_result->set_float_no_check(asin(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); + } + else if (x_type == EidosValueType::kValueFloat) + { + const double *float_data = x_value->FloatData(); + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + double *float_result_data = float_result->data_mutable(); + result_SP = EidosValue_SP(float_result); + + Eidos_SIMD::asin_float64(float_data, float_result_data, x_count); + } + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -145,17 +173,31 @@ EidosValue_SP Eidos_ExecuteFunction_asin(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_atan(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); + EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); - result_SP = EidosValue_SP(float_result); - - for (int value_index = 0; value_index < x_count; ++value_index) - float_result->set_float_no_check(atan(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); - + + if (x_type == EidosValueType::kValueInt) + { + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + result_SP = EidosValue_SP(float_result); + + for (int value_index = 0; value_index < x_count; ++value_index) + float_result->set_float_no_check(atan(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); + } + else if (x_type == EidosValueType::kValueFloat) + { + const double *float_data = x_value->FloatData(); + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + double *float_result_data = float_result->data_mutable(); + result_SP = EidosValue_SP(float_result); + + Eidos_SIMD::atan_float64(float_data, float_result_data, x_count); + } + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -163,32 +205,46 @@ EidosValue_SP Eidos_ExecuteFunction_atan(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_atan2(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); int x_count = x_value->Count(); + EidosValueType x_type = x_value->Type(); EidosValue *y_value = p_arguments[1].get(); int y_count = y_value->Count(); - + EidosValueType y_type = y_value->Type(); + if (x_count != y_count) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_atan2): function atan2() requires arguments of equal length." << EidosTerminate(nullptr); - + // matrices/arrays must be conformable, and we need to decide here which operand's dimensionality will be used for the result int x_dimcount = x_value->DimensionCount(); int y_dimcount = y_value->DimensionCount(); EidosValue_SP result_dim_source(EidosValue::BinaryOperationDimensionSource(x_value, y_value)); - + if ((x_dimcount > 1) && (y_dimcount > 1) && !EidosValue::MatchingDimensions(x_value, y_value)) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_atan2): non-conformable array operands in atan2()." << EidosTerminate(nullptr); - + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); result_SP = EidosValue_SP(float_result); - - for (int value_index = 0; value_index < x_count; ++value_index) - float_result->set_float_no_check(atan2(x_value->NumericAtIndex_NOCAST(value_index, nullptr), y_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); - + + // Use SIMD when both operands are float type + if ((x_type == EidosValueType::kValueFloat) && (y_type == EidosValueType::kValueFloat)) + { + const double *x_data = x_value->FloatData(); + const double *y_data = y_value->FloatData(); + double *float_result_data = float_result->data_mutable(); + + Eidos_SIMD::atan2_float64(x_data, y_data, float_result_data, x_count); + } + else + { + for (int value_index = 0; value_index < x_count; ++value_index) + float_result->set_float_no_check(atan2(x_value->NumericAtIndex_NOCAST(value_index, nullptr), y_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); + } + // Copy dimensions from whichever operand we chose at the beginning result_SP->CopyDimensionsFromValue(result_dim_source.get()); - + return result_SP; } @@ -223,17 +279,31 @@ EidosValue_SP Eidos_ExecuteFunction_ceil(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_cos(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); + EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); - result_SP = EidosValue_SP(float_result); - - for (int value_index = 0; value_index < x_count; ++value_index) - float_result->set_float_no_check(cos(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); - + + if (x_type == EidosValueType::kValueInt) + { + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + result_SP = EidosValue_SP(float_result); + + for (int value_index = 0; value_index < x_count; ++value_index) + float_result->set_float_no_check(cos(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); + } + else if (x_type == EidosValueType::kValueFloat) + { + const double *float_data = x_value->FloatData(); + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + double *float_result_data = float_result->data_mutable(); + result_SP = EidosValue_SP(float_result); + + Eidos_SIMD::cos_float64(float_data, float_result_data, x_count); + } + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -2427,17 +2497,31 @@ EidosValue_SP Eidos_ExecuteFunction_sign(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_sin(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); + EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); - result_SP = EidosValue_SP(float_result); - - for (int value_index = 0; value_index < x_count; ++value_index) - float_result->set_float_no_check(sin(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); - + + if (x_type == EidosValueType::kValueInt) + { + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + result_SP = EidosValue_SP(float_result); + + for (int value_index = 0; value_index < x_count; ++value_index) + float_result->set_float_no_check(sin(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); + } + else if (x_type == EidosValueType::kValueFloat) + { + const double *float_data = x_value->FloatData(); + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + double *float_result_data = float_result->data_mutable(); + result_SP = EidosValue_SP(float_result); + + Eidos_SIMD::sin_float64(float_data, float_result_data, x_count); + } + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -2617,17 +2701,31 @@ EidosValue_SP Eidos_ExecuteFunction_sumExact(const std::vector &p EidosValue_SP Eidos_ExecuteFunction_tan(const std::vector &p_arguments, EidosInterpreter __attribute__((unused)) &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); + EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); - result_SP = EidosValue_SP(float_result); - - for (int value_index = 0; value_index < x_count; ++value_index) - float_result->set_float_no_check(tan(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); - + + if (x_type == EidosValueType::kValueInt) + { + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + result_SP = EidosValue_SP(float_result); + + for (int value_index = 0; value_index < x_count; ++value_index) + float_result->set_float_no_check(tan(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); + } + else if (x_type == EidosValueType::kValueFloat) + { + const double *float_data = x_value->FloatData(); + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); + double *float_result_data = float_result->data_mutable(); + result_SP = EidosValue_SP(float_result); + + Eidos_SIMD::tan_float64(float_data, float_result_data, x_count); + } + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } diff --git a/eidos/eidos_simd.h b/eidos/eidos_simd.h index 4c142dd3..9cd0c8af 100644 --- a/eidos/eidos_simd.h +++ b/eidos/eidos_simd.h @@ -363,6 +363,154 @@ inline void log2_float64(const double *input, double *output, int64_t count) output[i] = std::log2(input[i]); } +// --------------------- +// Sine: sin(x) +// --------------------- +inline void sin_float64(const double *input, double *output, int64_t count) +{ + int64_t i = 0; + +#if EIDOS_SLEEF_AVAILABLE + for (; i + EIDOS_SLEEF_VEC_SIZE <= count; i += EIDOS_SLEEF_VEC_SIZE) + { + EIDOS_SLEEF_TYPE_D v = EIDOS_SLEEF_LOAD_D(&input[i]); + EIDOS_SLEEF_TYPE_D r = EIDOS_SLEEF_SIN_D(v); + EIDOS_SLEEF_STORE_D(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::sin(input[i]); +} + +// --------------------- +// Cosine: cos(x) +// --------------------- +inline void cos_float64(const double *input, double *output, int64_t count) +{ + int64_t i = 0; + +#if EIDOS_SLEEF_AVAILABLE + for (; i + EIDOS_SLEEF_VEC_SIZE <= count; i += EIDOS_SLEEF_VEC_SIZE) + { + EIDOS_SLEEF_TYPE_D v = EIDOS_SLEEF_LOAD_D(&input[i]); + EIDOS_SLEEF_TYPE_D r = EIDOS_SLEEF_COS_D(v); + EIDOS_SLEEF_STORE_D(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::cos(input[i]); +} + +// --------------------- +// Tangent: tan(x) +// --------------------- +inline void tan_float64(const double *input, double *output, int64_t count) +{ + int64_t i = 0; + +#if EIDOS_SLEEF_AVAILABLE + for (; i + EIDOS_SLEEF_VEC_SIZE <= count; i += EIDOS_SLEEF_VEC_SIZE) + { + EIDOS_SLEEF_TYPE_D v = EIDOS_SLEEF_LOAD_D(&input[i]); + EIDOS_SLEEF_TYPE_D r = EIDOS_SLEEF_TAN_D(v); + EIDOS_SLEEF_STORE_D(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::tan(input[i]); +} + +// --------------------- +// Arc Sine: asin(x) +// --------------------- +inline void asin_float64(const double *input, double *output, int64_t count) +{ + int64_t i = 0; + +#if EIDOS_SLEEF_AVAILABLE + for (; i + EIDOS_SLEEF_VEC_SIZE <= count; i += EIDOS_SLEEF_VEC_SIZE) + { + EIDOS_SLEEF_TYPE_D v = EIDOS_SLEEF_LOAD_D(&input[i]); + EIDOS_SLEEF_TYPE_D r = EIDOS_SLEEF_ASIN_D(v); + EIDOS_SLEEF_STORE_D(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::asin(input[i]); +} + +// --------------------- +// Arc Cosine: acos(x) +// --------------------- +inline void acos_float64(const double *input, double *output, int64_t count) +{ + int64_t i = 0; + +#if EIDOS_SLEEF_AVAILABLE + for (; i + EIDOS_SLEEF_VEC_SIZE <= count; i += EIDOS_SLEEF_VEC_SIZE) + { + EIDOS_SLEEF_TYPE_D v = EIDOS_SLEEF_LOAD_D(&input[i]); + EIDOS_SLEEF_TYPE_D r = EIDOS_SLEEF_ACOS_D(v); + EIDOS_SLEEF_STORE_D(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::acos(input[i]); +} + +// --------------------- +// Arc Tangent: atan(x) +// --------------------- +inline void atan_float64(const double *input, double *output, int64_t count) +{ + int64_t i = 0; + +#if EIDOS_SLEEF_AVAILABLE + for (; i + EIDOS_SLEEF_VEC_SIZE <= count; i += EIDOS_SLEEF_VEC_SIZE) + { + EIDOS_SLEEF_TYPE_D v = EIDOS_SLEEF_LOAD_D(&input[i]); + EIDOS_SLEEF_TYPE_D r = EIDOS_SLEEF_ATAN_D(v); + EIDOS_SLEEF_STORE_D(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::atan(input[i]); +} + +// --------------------- +// Arc Tangent 2: atan2(y, x) +// --------------------- +inline void atan2_float64(const double *y, const double *x, double *output, int64_t count) +{ + int64_t i = 0; + +#if EIDOS_SLEEF_AVAILABLE + for (; i + EIDOS_SLEEF_VEC_SIZE <= count; i += EIDOS_SLEEF_VEC_SIZE) + { + EIDOS_SLEEF_TYPE_D vy = EIDOS_SLEEF_LOAD_D(&y[i]); + EIDOS_SLEEF_TYPE_D vx = EIDOS_SLEEF_LOAD_D(&x[i]); + EIDOS_SLEEF_TYPE_D r = EIDOS_SLEEF_ATAN2_D(vy, vx); + EIDOS_SLEEF_STORE_D(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::atan2(y[i], x[i]); +} + // ================================ // Reductions // ================================ diff --git a/eidos/eidos_test.cpp b/eidos/eidos_test.cpp index 11d458a2..ea05d36e 100644 --- a/eidos/eidos_test.cpp +++ b/eidos/eidos_test.cpp @@ -47,8 +47,8 @@ // Keeping records of test success / failure -static int gEidosTestSuccessCount = 0; -static int gEidosTestFailureCount = 0; +int gEidosTestSuccessCount = 0; +int gEidosTestFailureCount = 0; // Instantiates and runs the script, and prints an error if the result does not match expectations @@ -358,6 +358,7 @@ int RunEidosTests(void) _RunFunctionMathTests_setUnionIntersection(); _RunFunctionMathTests_setDifferenceSymmetricDifference(); _RunFunctionMathTests_s_through_z(); + _RunSIMDMathTests(); _RunFunctionMatrixArrayTests(); _RunFunctionStatisticsTests_a_through_p(); _RunFunctionStatisticsTests_q_through_z(); diff --git a/eidos/eidos_test.h b/eidos/eidos_test.h index b4290c63..ed386a73 100644 --- a/eidos/eidos_test.h +++ b/eidos/eidos_test.h @@ -34,6 +34,9 @@ int RunEidosTests(void); +// Test success/failure counters (used by C++-level tests) +extern int gEidosTestSuccessCount; +extern int gEidosTestFailureCount; // Can turn on escape sequences to color test output; at present we turn these on for the command-line // tools and off for the GUI tools, since Terminal supports these codes but Xcode does not. @@ -110,6 +113,7 @@ extern void _RunFunctionMathTests_g_through_r(void); extern void _RunFunctionMathTests_setUnionIntersection(void); extern void _RunFunctionMathTests_setDifferenceSymmetricDifference(void); extern void _RunFunctionMathTests_s_through_z(void); +extern void _RunSIMDMathTests(void); extern void _RunFunctionMatrixArrayTests(void); extern void _RunFunctionStatisticsTests_a_through_p(void); extern void _RunFunctionStatisticsTests_q_through_z(void); diff --git a/eidos/eidos_test_functions_math.cpp b/eidos/eidos_test_functions_math.cpp index e8c80c3b..ca0f6552 100644 --- a/eidos/eidos_test_functions_math.cpp +++ b/eidos/eidos_test_functions_math.cpp @@ -1194,6 +1194,211 @@ void _RunFunctionMathTests_s_through_z(void) } +#pragma mark - +#pragma mark SIMD math wrapper tests +#pragma mark - + +// SIMD wrapper tests - verify that SLEEF-accelerated functions produce +// results equivalent to scalar std:: functions + +#include "eidos_simd.h" +#include "eidos_globals.h" + +// Test a unary SIMD function against its scalar equivalent +// Template for functions like sin, cos, exp, etc. +template +static void _TestUnarySimdFunction(const char *name, ScalarFunc scalar_func, SimdFunc simd_func, + const double *test_values, int num_values, double tolerance = 1e-14) +{ + // Allocate output arrays + std::vector scalar_output(num_values); + std::vector simd_output(num_values); + + // Compute scalar results + for (int i = 0; i < num_values; i++) + scalar_output[i] = scalar_func(test_values[i]); + + // Compute SIMD results + simd_func(test_values, simd_output.data(), num_values); + + // Compare results + bool all_match = true; + for (int i = 0; i < num_values; i++) + { + double diff = std::abs(simd_output[i] - scalar_output[i]); + // Handle NaN comparisons + if (std::isnan(scalar_output[i]) && std::isnan(simd_output[i])) + continue; + // Handle Inf comparisons + if (std::isinf(scalar_output[i]) && std::isinf(simd_output[i]) && + (scalar_output[i] > 0) == (simd_output[i] > 0)) + continue; + // Compare with relative tolerance for large values + double max_val = std::max(std::abs(scalar_output[i]), std::abs(simd_output[i])); + if (max_val > 1.0) + diff /= max_val; + if (diff > tolerance) + { + all_match = false; + std::cerr << "SIMD " << name << " mismatch at index " << i << ": input=" << test_values[i] + << ", scalar=" << scalar_output[i] << ", simd=" << simd_output[i] + << ", diff=" << diff << std::endl; + } + } + + if (all_match) + { + gEidosTestSuccessCount++; + } + else + { + gEidosTestFailureCount++; + std::cerr << EIDOS_OUTPUT_FAILURE_TAG << " : SIMD " << name << "() test failed" << std::endl; + } +} + +// Test atan2 SIMD function (binary operation) +static void _TestAtan2SimdFunction(const double *y_values, const double *x_values, + int num_values, double tolerance = 1e-14) +{ + std::vector scalar_output(num_values); + std::vector simd_output(num_values); + + // Compute scalar results + for (int i = 0; i < num_values; i++) + scalar_output[i] = std::atan2(y_values[i], x_values[i]); + + // Compute SIMD results + Eidos_SIMD::atan2_float64(y_values, x_values, simd_output.data(), num_values); + + // Compare results + bool all_match = true; + for (int i = 0; i < num_values; i++) + { + double diff = std::abs(simd_output[i] - scalar_output[i]); + if (std::isnan(scalar_output[i]) && std::isnan(simd_output[i])) + continue; + double max_val = std::max(std::abs(scalar_output[i]), std::abs(simd_output[i])); + if (max_val > 1.0) + diff /= max_val; + if (diff > tolerance) + { + all_match = false; + std::cerr << "SIMD atan2 mismatch at index " << i << ": y=" << y_values[i] + << ", x=" << x_values[i] << ", scalar=" << scalar_output[i] + << ", simd=" << simd_output[i] << ", diff=" << diff << std::endl; + } + } + + if (all_match) + { + gEidosTestSuccessCount++; + } + else + { + gEidosTestFailureCount++; + std::cerr << EIDOS_OUTPUT_FAILURE_TAG << " : SIMD atan2() test failed" << std::endl; + } +} + +void _RunSIMDMathTests(void) +{ +#if !defined(EIDOS_SLEEF_AVAILABLE) || !EIDOS_SLEEF_AVAILABLE + std::cout << "NOTE: SLEEF is not available in this build; SIMD math tests will compare scalar fallback against std:: (trivially identical)." << std::endl; +#endif + + // Test values for trigonometric functions (in radians) + // Include various edge cases and values that span multiple SIMD vector widths + const double trig_test_values[] = { + // Basic values + 0.0, 0.1, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, + // Negative values + -0.1, -0.5, -1.0, -1.5, -2.0, -2.5, -3.0, + // Special angles (multiples of pi/4, pi/2, pi) + M_PI/6, M_PI/4, M_PI/3, M_PI/2, 2*M_PI/3, 3*M_PI/4, M_PI, + -M_PI/6, -M_PI/4, -M_PI/3, -M_PI/2, -2*M_PI/3, -3*M_PI/4, -M_PI, + // Larger values to test range reduction + 5.0, 10.0, 100.0, -5.0, -10.0, -100.0, + // Values near boundaries + 1e-10, 1e-5, -1e-10, -1e-5, + // More values to ensure we test vector remainder handling (test odd counts) + 0.123, 0.456, 0.789, 1.234, 1.567, 1.890, 2.345 + }; + const int num_trig_values = sizeof(trig_test_values) / sizeof(double); + + // Test values for inverse trig functions (domain: [-1, 1] for asin/acos) + const double inv_trig_test_values[] = { + 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, + -0.1, -0.2, -0.3, -0.4, -0.5, -0.6, -0.7, -0.8, -0.9, -1.0, + 0.99, 0.999, -0.99, -0.999, + 0.01, 0.001, -0.01, -0.001 + }; + const int num_inv_trig_values = sizeof(inv_trig_test_values) / sizeof(double); + + // Test values for exp/log functions + const double exp_test_values[] = { + 0.0, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, + -0.1, -0.5, -1.0, -2.0, -5.0, -10.0, + 0.001, 0.01, -0.001, -0.01, + 700.0, -700.0 // Near overflow/underflow for exp + }; + const int num_exp_values = sizeof(exp_test_values) / sizeof(double); + + const double log_test_values[] = { + 0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 100.0, 1000.0, + M_E, M_E * M_E, 1.0/M_E, + 1e-10, 1e10 + }; + const int num_log_values = sizeof(log_test_values) / sizeof(double); + + // Test y,x pairs for atan2 + const double atan2_y_values[] = { + 0.0, 1.0, 1.0, 0.0, -1.0, -1.0, -1.0, 0.0, 1.0, + 0.5, 0.5, -0.5, -0.5, 3.0, -3.0, 4.0, -4.0 + }; + const double atan2_x_values[] = { + 1.0, 0.0, 1.0, -1.0, 0.0, 1.0, -1.0, 1.0, -1.0, + 0.5, -0.5, 0.5, -0.5, 4.0, 4.0, -3.0, -3.0 + }; + const int num_atan2_values = sizeof(atan2_y_values) / sizeof(double); + + // Run tests for each SIMD function + // Trigonometric functions + _TestUnarySimdFunction("sin", [](double x) { return std::sin(x); }, + Eidos_SIMD::sin_float64, trig_test_values, num_trig_values); + + _TestUnarySimdFunction("cos", [](double x) { return std::cos(x); }, + Eidos_SIMD::cos_float64, trig_test_values, num_trig_values); + + _TestUnarySimdFunction("tan", [](double x) { return std::tan(x); }, + Eidos_SIMD::tan_float64, trig_test_values, num_trig_values, 1e-12); // tan needs more tolerance near poles + + // Inverse trigonometric functions + _TestUnarySimdFunction("asin", [](double x) { return std::asin(x); }, + Eidos_SIMD::asin_float64, inv_trig_test_values, num_inv_trig_values); + + _TestUnarySimdFunction("acos", [](double x) { return std::acos(x); }, + Eidos_SIMD::acos_float64, inv_trig_test_values, num_inv_trig_values); + + _TestUnarySimdFunction("atan", [](double x) { return std::atan(x); }, + Eidos_SIMD::atan_float64, trig_test_values, num_trig_values); // atan accepts any real value + + // atan2 (binary function) + _TestAtan2SimdFunction(atan2_y_values, atan2_x_values, num_atan2_values); + + // Exponential and logarithmic functions (these were already implemented, test them too) + _TestUnarySimdFunction("exp", [](double x) { return std::exp(x); }, + Eidos_SIMD::exp_float64, exp_test_values, num_exp_values); + + _TestUnarySimdFunction("log", [](double x) { return std::log(x); }, + Eidos_SIMD::log_float64, log_test_values, num_log_values); + + _TestUnarySimdFunction("log10", [](double x) { return std::log10(x); }, + Eidos_SIMD::log10_float64, log_test_values, num_log_values); + + _TestUnarySimdFunction("log2", [](double x) { return std::log2(x); }, + Eidos_SIMD::log2_float64, log_test_values, num_log_values); +} diff --git a/eidos/sleef/sleef_config.h b/eidos/sleef/sleef_config.h index 6844afbc..fee78879 100644 --- a/eidos/sleef/sleef_config.h +++ b/eidos/sleef/sleef_config.h @@ -62,6 +62,15 @@ #define EIDOS_SLEEF_LOG10_D(v) Sleef_log10d4_u10avx2(v) #define EIDOS_SLEEF_LOG2_D(v) Sleef_log2d4_u10avx2(v) + // Trigonometric functions (u10 = 1.0 ULP accuracy) + #define EIDOS_SLEEF_SIN_D(v) Sleef_sind4_u10avx2(v) + #define EIDOS_SLEEF_COS_D(v) Sleef_cosd4_u10avx2(v) + #define EIDOS_SLEEF_TAN_D(v) Sleef_tand4_u10avx2(v) + #define EIDOS_SLEEF_ASIN_D(v) Sleef_asind4_u10avx2(v) + #define EIDOS_SLEEF_ACOS_D(v) Sleef_acosd4_u10avx2(v) + #define EIDOS_SLEEF_ATAN_D(v) Sleef_atand4_u10avx2(v) + #define EIDOS_SLEEF_ATAN2_D(y,x) Sleef_atan2d4_u10avx2(y,x) + // Float (single-precision) support - 8 floats per AVX2 register #define EIDOS_SLEEF_FLOAT_AVAILABLE 1 #define EIDOS_SLEEF_VEC_SIZE_F 8 @@ -94,6 +103,15 @@ #define EIDOS_SLEEF_LOG10_D(v) Sleef_log10d2_u10advsimd(v) #define EIDOS_SLEEF_LOG2_D(v) Sleef_log2d2_u10advsimd(v) + // Trigonometric functions (u10 = 1.0 ULP accuracy) + #define EIDOS_SLEEF_SIN_D(v) Sleef_sind2_u10advsimd(v) + #define EIDOS_SLEEF_COS_D(v) Sleef_cosd2_u10advsimd(v) + #define EIDOS_SLEEF_TAN_D(v) Sleef_tand2_u10advsimd(v) + #define EIDOS_SLEEF_ASIN_D(v) Sleef_asind2_u10advsimd(v) + #define EIDOS_SLEEF_ACOS_D(v) Sleef_acosd2_u10advsimd(v) + #define EIDOS_SLEEF_ATAN_D(v) Sleef_atand2_u10advsimd(v) + #define EIDOS_SLEEF_ATAN2_D(y,x) Sleef_atan2d2_u10advsimd(y,x) + // Float (single-precision) support - 4 floats per NEON register #define EIDOS_SLEEF_FLOAT_AVAILABLE 1 #define EIDOS_SLEEF_VEC_SIZE_F 4 From 401937607e180eb297310816098efa10ec8dcbc9 Mon Sep 17 00:00:00 2001 From: Andrew Kern Date: Wed, 17 Dec 2025 15:29:54 -0800 Subject: [PATCH 2/6] Add SLEEF acceleration for pow() function - Add EIDOS_SLEEF_POW_D macro for AVX2 and ARM NEON in sleef_config.h - Add pow_float64(), pow_float64_scalar_exp(), and pow_float64_scalar_base() wrapper functions in eidos_simd.h - Update Eidos ^ operator in eidos_interpreter.cpp to use SLEEF pow - Update SpatialMap::power() in spatial_map.cpp to use SLEEF pow - Add C++ unit tests for pow functions in eidos_test_functions_math.cpp --- core/spatial_map.cpp | 12 ++- eidos/eidos_interpreter.cpp | 25 +++--- eidos/eidos_simd.h | 78 +++++++++++++++++ eidos/eidos_test_functions_math.cpp | 130 ++++++++++++++++++++++++++++ eidos/sleef/sleef_config.h | 6 ++ 5 files changed, 232 insertions(+), 19 deletions(-) diff --git a/core/spatial_map.cpp b/core/spatial_map.cpp index bb598d3a..cd1fafa1 100644 --- a/core/spatial_map.cpp +++ b/core/spatial_map.cpp @@ -1652,22 +1652,20 @@ EidosValue_SP SpatialMap::ExecuteMethod_power(EidosGlobalStringID p_method_id, c if ((x_value->Type() == EidosValueType::kValueInt) || (x_value->Type() == EidosValueType::kValueFloat)) { double power_scalar = x_value->NumericAtIndex_NOCAST(0, nullptr); - + // FIXME: TO BE PARALLELIZED - for (int64_t i = 0; i < values_size_; ++i) - values_[i] = pow(values_[i], power_scalar); + Eidos_SIMD::pow_float64_scalar_exp(values_, power_scalar, values_, values_size_); } else { SpatialMap *power_map = (SpatialMap *)x_value->ObjectElementAtIndex_NOCAST(0, nullptr); double *power_map_values = power_map->values_; - + if (!IsCompatibleWithMap(power_map)) EIDOS_TERMINATION << "ERROR (SpatialMap::ExecuteMethod_power): power() requires the target SpatialMap to be compatible with the SpatialMap supplied in x (using the same spatiality and bounds, and having the same grid resolution)." << EidosTerminate(); - + // FIXME: TO BE PARALLELIZED - for (int64_t i = 0; i < values_size_; ++i) - values_[i] = pow(values_[i], power_map_values[i]); + Eidos_SIMD::pow_float64(values_, power_map_values, values_, values_size_); } _ValuesChanged(); diff --git a/eidos/eidos_interpreter.cpp b/eidos/eidos_interpreter.cpp index e5a379a2..ae69e590 100644 --- a/eidos/eidos_interpreter.cpp +++ b/eidos/eidos_interpreter.cpp @@ -23,6 +23,7 @@ #include "eidos_ast_node.h" #include "eidos_rng.h" #include "eidos_call_signature.h" +#include "eidos_simd.h" #include #include @@ -3211,9 +3212,9 @@ EidosValue_SP EidosInterpreter::Evaluate_Exp(const EidosASTNode *p_node) { const double *first_child_data = first_child_value->FloatData(); const double *second_child_data = second_child_value->FloatData(); - - for (int value_index = 0; value_index < first_child_count; ++value_index) - float_result->set_float_no_check(pow(first_child_data[value_index], second_child_data[value_index]), value_index); + double *float_result_data = float_result->data_mutable(); + + Eidos_SIMD::pow_float64(first_child_data, second_child_data, float_result_data, first_child_count); } else if ((first_child_type == EidosValueType::kValueFloat) && (second_child_type == EidosValueType::kValueInt)) { @@ -3258,11 +3259,11 @@ EidosValue_SP EidosInterpreter::Evaluate_Exp(const EidosASTNode *p_node) else // (second_child_type == EidosValueType::kValueFloat) { const double *second_child_data = second_child_value->FloatData(); - - for (int value_index = 0; value_index < second_child_count; ++value_index) - float_result->set_float_no_check(pow(singleton_float, second_child_data[value_index]), value_index); + double *float_result_data = float_result->data_mutable(); + + Eidos_SIMD::pow_float64_scalar_base(singleton_float, second_child_data, float_result_data, second_child_count); } - + result_SP = float_result_SP; } else if (second_child_count == 1) @@ -3270,20 +3271,20 @@ EidosValue_SP EidosInterpreter::Evaluate_Exp(const EidosASTNode *p_node) double singleton_float = second_child_value->NumericAtIndex_NOCAST(0, operator_token); EidosValue_Float_SP float_result_SP = EidosValue_Float_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Float()); EidosValue_Float *float_result = float_result_SP->resize_no_initialize(first_child_count); - + if (first_child_type == EidosValueType::kValueInt) { const int64_t *first_child_data = first_child_value->IntData(); - + for (int value_index = 0; value_index < first_child_count; ++value_index) float_result->set_float_no_check(pow(first_child_data[value_index], singleton_float), value_index); } else // (first_child_type == EidosValueType::kValueFloat) { const double *first_child_data = first_child_value->FloatData(); - - for (int value_index = 0; value_index < first_child_count; ++value_index) - float_result->set_float_no_check(pow(first_child_data[value_index], singleton_float), value_index); + double *float_result_data = float_result->data_mutable(); + + Eidos_SIMD::pow_float64_scalar_exp(first_child_data, singleton_float, float_result_data, first_child_count); } result_SP = float_result_SP; diff --git a/eidos/eidos_simd.h b/eidos/eidos_simd.h index 9cd0c8af..da4bcdb9 100644 --- a/eidos/eidos_simd.h +++ b/eidos/eidos_simd.h @@ -511,6 +511,84 @@ inline void atan2_float64(const double *y, const double *x, double *output, int6 output[i] = std::atan2(y[i], x[i]); } +// --------------------- +// Power: pow(x, y) = x^y +// --------------------- +inline void pow_float64(const double *base, const double *exp, double *output, int64_t count) +{ + int64_t i = 0; + +#if EIDOS_SLEEF_AVAILABLE + for (; i + EIDOS_SLEEF_VEC_SIZE <= count; i += EIDOS_SLEEF_VEC_SIZE) + { + EIDOS_SLEEF_TYPE_D vb = EIDOS_SLEEF_LOAD_D(&base[i]); + EIDOS_SLEEF_TYPE_D ve = EIDOS_SLEEF_LOAD_D(&exp[i]); + EIDOS_SLEEF_TYPE_D r = EIDOS_SLEEF_POW_D(vb, ve); + EIDOS_SLEEF_STORE_D(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::pow(base[i], exp[i]); +} + +// Broadcast version: all elements raised to same power (base_array ^ scalar_exp) +inline void pow_float64_scalar_exp(const double *base, double exp_scalar, double *output, int64_t count) +{ + int64_t i = 0; + +#if defined(EIDOS_HAS_AVX2) && defined(EIDOS_HAS_FMA) + __m256d ve_broadcast = _mm256_set1_pd(exp_scalar); + for (; i + 4 <= count; i += 4) + { + __m256d vb = _mm256_loadu_pd(&base[i]); + __m256d r = Sleef_powd4_u10avx2(vb, ve_broadcast); + _mm256_storeu_pd(&output[i], r); + } +#elif defined(EIDOS_HAS_NEON) + float64x2_t ve_broadcast = vdupq_n_f64(exp_scalar); + for (; i + 2 <= count; i += 2) + { + float64x2_t vb = vld1q_f64(&base[i]); + float64x2_t r = Sleef_powd2_u10advsimd(vb, ve_broadcast); + vst1q_f64(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::pow(base[i], exp_scalar); +} + +// Broadcast version: scalar base raised to array of powers (scalar_base ^ exp_array) +inline void pow_float64_scalar_base(double base_scalar, const double *exp, double *output, int64_t count) +{ + int64_t i = 0; + +#if defined(EIDOS_HAS_AVX2) && defined(EIDOS_HAS_FMA) + __m256d vb_broadcast = _mm256_set1_pd(base_scalar); + for (; i + 4 <= count; i += 4) + { + __m256d ve = _mm256_loadu_pd(&exp[i]); + __m256d r = Sleef_powd4_u10avx2(vb_broadcast, ve); + _mm256_storeu_pd(&output[i], r); + } +#elif defined(EIDOS_HAS_NEON) + float64x2_t vb_broadcast = vdupq_n_f64(base_scalar); + for (; i + 2 <= count; i += 2) + { + float64x2_t ve = vld1q_f64(&exp[i]); + float64x2_t r = Sleef_powd2_u10advsimd(vb_broadcast, ve); + vst1q_f64(&output[i], r); + } +#endif + + // Scalar remainder + for (; i < count; i++) + output[i] = std::pow(base_scalar, exp[i]); +} + // ================================ // Reductions // ================================ diff --git a/eidos/eidos_test_functions_math.cpp b/eidos/eidos_test_functions_math.cpp index ca0f6552..3ecb47f8 100644 --- a/eidos/eidos_test_functions_math.cpp +++ b/eidos/eidos_test_functions_math.cpp @@ -1398,6 +1398,136 @@ void _RunSIMDMathTests(void) _TestUnarySimdFunction("log2", [](double x) { return std::log2(x); }, Eidos_SIMD::log2_float64, log_test_values, num_log_values); + + // Test pow() function (binary operation: base^exp) + // Test values for base and exponent + const double pow_base_values[] = { + 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, + 0.1, 0.2, 0.9, 1.5, 2.5, + 100.0, 0.01 + }; + const double pow_exp_values[] = { + 0.0, 0.5, 1.0, 2.0, 3.0, -1.0, -2.0, + 0.25, 0.333, 1.5, 2.5, -0.5, + 0.1, 10.0 + }; + const int num_pow_values = sizeof(pow_base_values) / sizeof(double); + + // Test pow_float64 (both arrays) + { + std::vector scalar_output(num_pow_values); + std::vector simd_output(num_pow_values); + + for (int i = 0; i < num_pow_values; i++) + scalar_output[i] = std::pow(pow_base_values[i], pow_exp_values[i]); + + Eidos_SIMD::pow_float64(pow_base_values, pow_exp_values, simd_output.data(), num_pow_values); + + bool all_match = true; + for (int i = 0; i < num_pow_values; i++) + { + double diff = std::abs(simd_output[i] - scalar_output[i]); + if (std::isnan(scalar_output[i]) && std::isnan(simd_output[i])) + continue; + if (std::isinf(scalar_output[i]) && std::isinf(simd_output[i]) && + (scalar_output[i] > 0) == (simd_output[i] > 0)) + continue; + double max_val = std::max(std::abs(scalar_output[i]), std::abs(simd_output[i])); + if (max_val > 1.0) + diff /= max_val; + if (diff > 1e-14) + { + all_match = false; + std::cerr << "SIMD pow mismatch at index " << i << ": base=" << pow_base_values[i] + << ", exp=" << pow_exp_values[i] << ", scalar=" << scalar_output[i] + << ", simd=" << simd_output[i] << ", diff=" << diff << std::endl; + } + } + + if (all_match) + gEidosTestSuccessCount++; + else + { + gEidosTestFailureCount++; + std::cerr << EIDOS_OUTPUT_FAILURE_TAG << " : SIMD pow() test failed" << std::endl; + } + } + + // Test pow_float64_scalar_exp (base_array ^ scalar_exponent) + { + const double scalar_exp = 2.5; + std::vector scalar_output(num_pow_values); + std::vector simd_output(num_pow_values); + + for (int i = 0; i < num_pow_values; i++) + scalar_output[i] = std::pow(pow_base_values[i], scalar_exp); + + Eidos_SIMD::pow_float64_scalar_exp(pow_base_values, scalar_exp, simd_output.data(), num_pow_values); + + bool all_match = true; + for (int i = 0; i < num_pow_values; i++) + { + double diff = std::abs(simd_output[i] - scalar_output[i]); + if (std::isnan(scalar_output[i]) && std::isnan(simd_output[i])) + continue; + double max_val = std::max(std::abs(scalar_output[i]), std::abs(simd_output[i])); + if (max_val > 1.0) + diff /= max_val; + if (diff > 1e-14) + { + all_match = false; + std::cerr << "SIMD pow_scalar_exp mismatch at index " << i << ": base=" << pow_base_values[i] + << ", exp=" << scalar_exp << ", scalar=" << scalar_output[i] + << ", simd=" << simd_output[i] << ", diff=" << diff << std::endl; + } + } + + if (all_match) + gEidosTestSuccessCount++; + else + { + gEidosTestFailureCount++; + std::cerr << EIDOS_OUTPUT_FAILURE_TAG << " : SIMD pow_scalar_exp() test failed" << std::endl; + } + } + + // Test pow_float64_scalar_base (scalar_base ^ exp_array) + { + const double scalar_base = 2.0; + std::vector scalar_output(num_pow_values); + std::vector simd_output(num_pow_values); + + for (int i = 0; i < num_pow_values; i++) + scalar_output[i] = std::pow(scalar_base, pow_exp_values[i]); + + Eidos_SIMD::pow_float64_scalar_base(scalar_base, pow_exp_values, simd_output.data(), num_pow_values); + + bool all_match = true; + for (int i = 0; i < num_pow_values; i++) + { + double diff = std::abs(simd_output[i] - scalar_output[i]); + if (std::isnan(scalar_output[i]) && std::isnan(simd_output[i])) + continue; + double max_val = std::max(std::abs(scalar_output[i]), std::abs(simd_output[i])); + if (max_val > 1.0) + diff /= max_val; + if (diff > 1e-14) + { + all_match = false; + std::cerr << "SIMD pow_scalar_base mismatch at index " << i << ": base=" << scalar_base + << ", exp=" << pow_exp_values[i] << ", scalar=" << scalar_output[i] + << ", simd=" << simd_output[i] << ", diff=" << diff << std::endl; + } + } + + if (all_match) + gEidosTestSuccessCount++; + else + { + gEidosTestFailureCount++; + std::cerr << EIDOS_OUTPUT_FAILURE_TAG << " : SIMD pow_scalar_base() test failed" << std::endl; + } + } } diff --git a/eidos/sleef/sleef_config.h b/eidos/sleef/sleef_config.h index fee78879..3af9df49 100644 --- a/eidos/sleef/sleef_config.h +++ b/eidos/sleef/sleef_config.h @@ -71,6 +71,9 @@ #define EIDOS_SLEEF_ATAN_D(v) Sleef_atand4_u10avx2(v) #define EIDOS_SLEEF_ATAN2_D(y,x) Sleef_atan2d4_u10avx2(y,x) + // Power function (u10 = 1.0 ULP accuracy) + #define EIDOS_SLEEF_POW_D(x,y) Sleef_powd4_u10avx2(x,y) + // Float (single-precision) support - 8 floats per AVX2 register #define EIDOS_SLEEF_FLOAT_AVAILABLE 1 #define EIDOS_SLEEF_VEC_SIZE_F 8 @@ -112,6 +115,9 @@ #define EIDOS_SLEEF_ATAN_D(v) Sleef_atand2_u10advsimd(v) #define EIDOS_SLEEF_ATAN2_D(y,x) Sleef_atan2d2_u10advsimd(y,x) + // Power function (u10 = 1.0 ULP accuracy) + #define EIDOS_SLEEF_POW_D(x,y) Sleef_powd2_u10advsimd(x,y) + // Float (single-precision) support - 4 floats per NEON register #define EIDOS_SLEEF_FLOAT_AVAILABLE 1 #define EIDOS_SLEEF_VEC_SIZE_F 4 From dc3c4a319b265129abc6d32cf6a5b2690d8f5ef7 Mon Sep 17 00:00:00 2001 From: Andrew Kern Date: Wed, 17 Dec 2025 15:38:04 -0800 Subject: [PATCH 3/6] update benchmark script --- core/spatial_map.cpp | 6 +- simd_benchmarks/sleef_benchmark.slim | 94 ++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 7 deletions(-) diff --git a/core/spatial_map.cpp b/core/spatial_map.cpp index cd1fafa1..ea831402 100644 --- a/core/spatial_map.cpp +++ b/core/spatial_map.cpp @@ -1652,7 +1652,7 @@ EidosValue_SP SpatialMap::ExecuteMethod_power(EidosGlobalStringID p_method_id, c if ((x_value->Type() == EidosValueType::kValueInt) || (x_value->Type() == EidosValueType::kValueFloat)) { double power_scalar = x_value->NumericAtIndex_NOCAST(0, nullptr); - + // FIXME: TO BE PARALLELIZED Eidos_SIMD::pow_float64_scalar_exp(values_, power_scalar, values_, values_size_); } @@ -1660,10 +1660,10 @@ EidosValue_SP SpatialMap::ExecuteMethod_power(EidosGlobalStringID p_method_id, c { SpatialMap *power_map = (SpatialMap *)x_value->ObjectElementAtIndex_NOCAST(0, nullptr); double *power_map_values = power_map->values_; - + if (!IsCompatibleWithMap(power_map)) EIDOS_TERMINATION << "ERROR (SpatialMap::ExecuteMethod_power): power() requires the target SpatialMap to be compatible with the SpatialMap supplied in x (using the same spatiality and bounds, and having the same grid resolution)." << EidosTerminate(); - + // FIXME: TO BE PARALLELIZED Eidos_SIMD::pow_float64(values_, power_map_values, values_, values_size_); } diff --git a/simd_benchmarks/sleef_benchmark.slim b/simd_benchmarks/sleef_benchmark.slim index 9e5eaf0f..432d5097 100644 --- a/simd_benchmarks/sleef_benchmark.slim +++ b/simd_benchmarks/sleef_benchmark.slim @@ -1,5 +1,5 @@ // SLEEF Transcendental Functions Benchmark -// Tests exp(), log(), log10(), log2() performance +// Tests exp(), log(), log10(), log2(), trig functions, and pow() performance initialize() { initializeSLiMModelType("nonWF"); @@ -11,7 +11,18 @@ initialize() { // Create test data x = runif(N, 0.1, 10.0); - catn("Benchmarking SLEEF transcendental functions on " + N + " elements, " + REPS + " reps each\n"); + // Data for trig functions (angles in radians) + angles = runif(N, -PI, PI); + + // Data for inverse trig (must be in [-1, 1]) + trig_input = runif(N, -0.99, 0.99); + + // Second array for atan2 and pow + x2 = runif(N, 0.1, 5.0); + + catn("Benchmarking SLEEF functions on " + N + " elements, " + REPS + " reps each\n"); + + catn("=== Exponential/Logarithmic ==="); // Benchmark exp() start = clock(); @@ -41,8 +52,83 @@ initialize() { elapsed = clock() - start; catn("log2(): " + format("%.3f", elapsed/REPS*1000) + " ms per call"); - // Also benchmark functions that already had SIMD (for comparison) - catn("\nComparison with existing SIMD functions:"); + catn("\n=== Trigonometric ==="); + + // Benchmark sin() + start = clock(); + for (i in 1:REPS) + y = sin(angles); + elapsed = clock() - start; + catn("sin(): " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + // Benchmark cos() + start = clock(); + for (i in 1:REPS) + y = cos(angles); + elapsed = clock() - start; + catn("cos(): " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + // Benchmark tan() + start = clock(); + for (i in 1:REPS) + y = tan(angles); + elapsed = clock() - start; + catn("tan(): " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + catn("\n=== Inverse Trigonometric ==="); + + // Benchmark asin() + start = clock(); + for (i in 1:REPS) + y = asin(trig_input); + elapsed = clock() - start; + catn("asin(): " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + // Benchmark acos() + start = clock(); + for (i in 1:REPS) + y = acos(trig_input); + elapsed = clock() - start; + catn("acos(): " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + // Benchmark atan() + start = clock(); + for (i in 1:REPS) + y = atan(x); + elapsed = clock() - start; + catn("atan(): " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + // Benchmark atan2() + start = clock(); + for (i in 1:REPS) + y = atan2(angles, x2); + elapsed = clock() - start; + catn("atan2(): " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + catn("\n=== Power ==="); + + // Benchmark pow (x ^ y with two arrays) + start = clock(); + for (i in 1:REPS) + y = x ^ x2; + elapsed = clock() - start; + catn("x^y: " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + // Benchmark pow with scalar exponent (x ^ 2.5) + start = clock(); + for (i in 1:REPS) + y = x ^ 2.5; + elapsed = clock() - start; + catn("x^2.5: " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + // Benchmark pow with scalar base (2.0 ^ x) + start = clock(); + for (i in 1:REPS) + y = 2.0 ^ x; + elapsed = clock() - start; + catn("2.0^x: " + format("%.3f", elapsed/REPS*1000) + " ms per call"); + + catn("\n=== Native SIMD (comparison) ==="); // Benchmark sqrt() start = clock(); From 17d0906e55fbbbc285f19251716ac9addebfe0ff Mon Sep 17 00:00:00 2001 From: Andrew Kern Date: Wed, 17 Dec 2025 16:19:50 -0800 Subject: [PATCH 4/6] Fix whitespace: add tabs to blank lines inside functions --- eidos/eidos_functions_math.cpp | 90 +++++++++++++++++----------------- eidos/eidos_interpreter.cpp | 12 ++--- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/eidos/eidos_functions_math.cpp b/eidos/eidos_functions_math.cpp index 4ae02016..1a764d75 100644 --- a/eidos/eidos_functions_math.cpp +++ b/eidos/eidos_functions_math.cpp @@ -109,16 +109,16 @@ EidosValue_SP Eidos_ExecuteFunction_abs(const std::vector &p_argu EidosValue_SP Eidos_ExecuteFunction_acos(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - + if (x_type == EidosValueType::kValueInt) { EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); result_SP = EidosValue_SP(float_result); - + for (int value_index = 0; value_index < x_count; ++value_index) float_result->set_float_no_check(acos(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); } @@ -128,12 +128,12 @@ EidosValue_SP Eidos_ExecuteFunction_acos(const std::vector &p_arg EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); double *float_result_data = float_result->data_mutable(); result_SP = EidosValue_SP(float_result); - + Eidos_SIMD::acos_float64(float_data, float_result_data, x_count); } - + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -141,16 +141,16 @@ EidosValue_SP Eidos_ExecuteFunction_acos(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_asin(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - + if (x_type == EidosValueType::kValueInt) { EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); result_SP = EidosValue_SP(float_result); - + for (int value_index = 0; value_index < x_count; ++value_index) float_result->set_float_no_check(asin(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); } @@ -160,12 +160,12 @@ EidosValue_SP Eidos_ExecuteFunction_asin(const std::vector &p_arg EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); double *float_result_data = float_result->data_mutable(); result_SP = EidosValue_SP(float_result); - + Eidos_SIMD::asin_float64(float_data, float_result_data, x_count); } - + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -173,16 +173,16 @@ EidosValue_SP Eidos_ExecuteFunction_asin(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_atan(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - + if (x_type == EidosValueType::kValueInt) { EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); result_SP = EidosValue_SP(float_result); - + for (int value_index = 0; value_index < x_count; ++value_index) float_result->set_float_no_check(atan(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); } @@ -192,12 +192,12 @@ EidosValue_SP Eidos_ExecuteFunction_atan(const std::vector &p_arg EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); double *float_result_data = float_result->data_mutable(); result_SP = EidosValue_SP(float_result); - + Eidos_SIMD::atan_float64(float_data, float_result_data, x_count); } - + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -205,35 +205,35 @@ EidosValue_SP Eidos_ExecuteFunction_atan(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_atan2(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); int x_count = x_value->Count(); EidosValueType x_type = x_value->Type(); EidosValue *y_value = p_arguments[1].get(); int y_count = y_value->Count(); EidosValueType y_type = y_value->Type(); - + if (x_count != y_count) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_atan2): function atan2() requires arguments of equal length." << EidosTerminate(nullptr); - + // matrices/arrays must be conformable, and we need to decide here which operand's dimensionality will be used for the result int x_dimcount = x_value->DimensionCount(); int y_dimcount = y_value->DimensionCount(); EidosValue_SP result_dim_source(EidosValue::BinaryOperationDimensionSource(x_value, y_value)); - + if ((x_dimcount > 1) && (y_dimcount > 1) && !EidosValue::MatchingDimensions(x_value, y_value)) EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_atan2): non-conformable array operands in atan2()." << EidosTerminate(nullptr); - + EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); result_SP = EidosValue_SP(float_result); - + // Use SIMD when both operands are float type if ((x_type == EidosValueType::kValueFloat) && (y_type == EidosValueType::kValueFloat)) { const double *x_data = x_value->FloatData(); const double *y_data = y_value->FloatData(); double *float_result_data = float_result->data_mutable(); - + Eidos_SIMD::atan2_float64(x_data, y_data, float_result_data, x_count); } else @@ -241,10 +241,10 @@ EidosValue_SP Eidos_ExecuteFunction_atan2(const std::vector &p_ar for (int value_index = 0; value_index < x_count; ++value_index) float_result->set_float_no_check(atan2(x_value->NumericAtIndex_NOCAST(value_index, nullptr), y_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); } - + // Copy dimensions from whichever operand we chose at the beginning result_SP->CopyDimensionsFromValue(result_dim_source.get()); - + return result_SP; } @@ -279,16 +279,16 @@ EidosValue_SP Eidos_ExecuteFunction_ceil(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_cos(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - + if (x_type == EidosValueType::kValueInt) { EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); result_SP = EidosValue_SP(float_result); - + for (int value_index = 0; value_index < x_count; ++value_index) float_result->set_float_no_check(cos(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); } @@ -298,12 +298,12 @@ EidosValue_SP Eidos_ExecuteFunction_cos(const std::vector &p_argu EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); double *float_result_data = float_result->data_mutable(); result_SP = EidosValue_SP(float_result); - + Eidos_SIMD::cos_float64(float_data, float_result_data, x_count); } - + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -2497,16 +2497,16 @@ EidosValue_SP Eidos_ExecuteFunction_sign(const std::vector &p_arg EidosValue_SP Eidos_ExecuteFunction_sin(const std::vector &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - + if (x_type == EidosValueType::kValueInt) { EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); result_SP = EidosValue_SP(float_result); - + for (int value_index = 0; value_index < x_count; ++value_index) float_result->set_float_no_check(sin(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); } @@ -2516,12 +2516,12 @@ EidosValue_SP Eidos_ExecuteFunction_sin(const std::vector &p_argu EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); double *float_result_data = float_result->data_mutable(); result_SP = EidosValue_SP(float_result); - + Eidos_SIMD::sin_float64(float_data, float_result_data, x_count); } - + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } @@ -2701,16 +2701,16 @@ EidosValue_SP Eidos_ExecuteFunction_sumExact(const std::vector &p EidosValue_SP Eidos_ExecuteFunction_tan(const std::vector &p_arguments, EidosInterpreter __attribute__((unused)) &p_interpreter) { EidosValue_SP result_SP(nullptr); - + EidosValue *x_value = p_arguments[0].get(); EidosValueType x_type = x_value->Type(); int x_count = x_value->Count(); - + if (x_type == EidosValueType::kValueInt) { EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); result_SP = EidosValue_SP(float_result); - + for (int value_index = 0; value_index < x_count; ++value_index) float_result->set_float_no_check(tan(x_value->NumericAtIndex_NOCAST(value_index, nullptr)), value_index); } @@ -2720,12 +2720,12 @@ EidosValue_SP Eidos_ExecuteFunction_tan(const std::vector &p_argu EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count); double *float_result_data = float_result->data_mutable(); result_SP = EidosValue_SP(float_result); - + Eidos_SIMD::tan_float64(float_data, float_result_data, x_count); } - + result_SP->CopyDimensionsFromValue(x_value); - + return result_SP; } diff --git a/eidos/eidos_interpreter.cpp b/eidos/eidos_interpreter.cpp index ae69e590..c444640e 100644 --- a/eidos/eidos_interpreter.cpp +++ b/eidos/eidos_interpreter.cpp @@ -3213,7 +3213,7 @@ EidosValue_SP EidosInterpreter::Evaluate_Exp(const EidosASTNode *p_node) const double *first_child_data = first_child_value->FloatData(); const double *second_child_data = second_child_value->FloatData(); double *float_result_data = float_result->data_mutable(); - + Eidos_SIMD::pow_float64(first_child_data, second_child_data, float_result_data, first_child_count); } else if ((first_child_type == EidosValueType::kValueFloat) && (second_child_type == EidosValueType::kValueInt)) @@ -3260,10 +3260,10 @@ EidosValue_SP EidosInterpreter::Evaluate_Exp(const EidosASTNode *p_node) { const double *second_child_data = second_child_value->FloatData(); double *float_result_data = float_result->data_mutable(); - + Eidos_SIMD::pow_float64_scalar_base(singleton_float, second_child_data, float_result_data, second_child_count); } - + result_SP = float_result_SP; } else if (second_child_count == 1) @@ -3271,11 +3271,11 @@ EidosValue_SP EidosInterpreter::Evaluate_Exp(const EidosASTNode *p_node) double singleton_float = second_child_value->NumericAtIndex_NOCAST(0, operator_token); EidosValue_Float_SP float_result_SP = EidosValue_Float_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Float()); EidosValue_Float *float_result = float_result_SP->resize_no_initialize(first_child_count); - + if (first_child_type == EidosValueType::kValueInt) { const int64_t *first_child_data = first_child_value->IntData(); - + for (int value_index = 0; value_index < first_child_count; ++value_index) float_result->set_float_no_check(pow(first_child_data[value_index], singleton_float), value_index); } @@ -3283,7 +3283,7 @@ EidosValue_SP EidosInterpreter::Evaluate_Exp(const EidosASTNode *p_node) { const double *first_child_data = first_child_value->FloatData(); double *float_result_data = float_result->data_mutable(); - + Eidos_SIMD::pow_float64_scalar_exp(first_child_data, singleton_float, float_result_data, first_child_count); } From dad94b7efce70279e813ca84454341fd4e945dae Mon Sep 17 00:00:00 2001 From: Andrew Kern Date: Wed, 17 Dec 2025 16:46:12 -0800 Subject: [PATCH 5/6] forgot to alter tests to use allClose --- core/slim_test_other.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/core/slim_test_other.cpp b/core/slim_test_other.cpp index f1b47cc6..1e43c9a3 100644 --- a/core/slim_test_other.cpp +++ b/core/slim_test_other.cpp @@ -1668,9 +1668,9 @@ void _RunSpatialMapTests(void) SLiMAssertScriptStop(prefix_1D + "m1.divide(mv2); if (identical(mv1 / mv2, m1.gridValues())) stop(); } "); SLiMAssertScriptStop(prefix_1D + "m1.divide(m2); if (identical(mv1 / mv2, m1.gridValues())) stop(); } "); - SLiMAssertScriptStop(prefix_1D + "m1.power(0.25); if (identical(mv1 ^ 0.25, m1.gridValues())) stop(); } "); - SLiMAssertScriptStop(prefix_1D + "m1.power(mv2); if (identical(mv1 ^ mv2, m1.gridValues())) stop(); } "); - SLiMAssertScriptStop(prefix_1D + "m1.power(m2); if (identical(mv1 ^ mv2, m1.gridValues())) stop(); } "); + SLiMAssertScriptStop(prefix_1D + "m1.power(0.25); if (allClose(mv1 ^ 0.25, m1.gridValues())) stop(); } "); + SLiMAssertScriptStop(prefix_1D + "m1.power(mv2); if (allClose(mv1 ^ mv2, m1.gridValues())) stop(); } "); + SLiMAssertScriptStop(prefix_1D + "m1.power(m2); if (allClose(mv1 ^ mv2, m1.gridValues())) stop(); } "); SLiMAssertScriptStop(prefix_1D + "m1.exp(); if (allClose(exp(mv1), m1.gridValues())) stop(); } "); @@ -1783,9 +1783,9 @@ void _RunSpatialMapTests(void) SLiMAssertScriptStop(prefix_2D + "m1.divide(mv2); if (identical(mv1 / mv2, m1.gridValues())) stop(); } "); SLiMAssertScriptStop(prefix_2D + "m1.divide(m2); if (identical(mv1 / mv2, m1.gridValues())) stop(); } "); - SLiMAssertScriptStop(prefix_2D + "m1.power(0.25); if (identical(mv1 ^ 0.25, m1.gridValues())) stop(); } "); - SLiMAssertScriptStop(prefix_2D + "m1.power(mv2); if (identical(mv1 ^ mv2, m1.gridValues())) stop(); } "); - SLiMAssertScriptStop(prefix_2D + "m1.power(m2); if (identical(mv1 ^ mv2, m1.gridValues())) stop(); } "); + SLiMAssertScriptStop(prefix_2D + "m1.power(0.25); if (allClose(mv1 ^ 0.25, m1.gridValues())) stop(); } "); + SLiMAssertScriptStop(prefix_2D + "m1.power(mv2); if (allClose(mv1 ^ mv2, m1.gridValues())) stop(); } "); + SLiMAssertScriptStop(prefix_2D + "m1.power(m2); if (allClose(mv1 ^ mv2, m1.gridValues())) stop(); } "); SLiMAssertScriptStop(prefix_2D + "m1.exp(); if (allClose(exp(mv1), m1.gridValues())) stop(); } "); @@ -1899,9 +1899,9 @@ void _RunSpatialMapTests(void) SLiMAssertScriptStop(prefix_3D + "m1.divide(mv2); if (identical(mv1 / mv2, m1.gridValues())) stop(); } "); SLiMAssertScriptStop(prefix_3D + "m1.divide(m2); if (identical(mv1 / mv2, m1.gridValues())) stop(); } "); - SLiMAssertScriptStop(prefix_3D + "m1.power(0.25); if (identical(mv1 ^ 0.25, m1.gridValues())) stop(); } "); - SLiMAssertScriptStop(prefix_3D + "m1.power(mv2); if (identical(mv1 ^ mv2, m1.gridValues())) stop(); } "); - SLiMAssertScriptStop(prefix_3D + "m1.power(m2); if (identical(mv1 ^ mv2, m1.gridValues())) stop(); } "); + SLiMAssertScriptStop(prefix_3D + "m1.power(0.25); if (allClose(mv1 ^ 0.25, m1.gridValues())) stop(); } "); + SLiMAssertScriptStop(prefix_3D + "m1.power(mv2); if (allClose(mv1 ^ mv2, m1.gridValues())) stop(); } "); + SLiMAssertScriptStop(prefix_3D + "m1.power(m2); if (allClose(mv1 ^ mv2, m1.gridValues())) stop(); } "); SLiMAssertScriptStop(prefix_3D + "m1.exp(); if (allClose(exp(mv1), m1.gridValues())) stop(); } "); From 9d162453849a87b32a21ee1978bbd889bd6951fd Mon Sep 17 00:00:00 2001 From: Andrew Kern Date: Wed, 17 Dec 2025 17:47:00 -0800 Subject: [PATCH 6/6] Update VERSIONS for new SIMD functions --- VERSIONS | 1 + 1 file changed, 1 insertion(+) diff --git a/VERSIONS b/VERSIONS index a61461ca..d76f30bc 100644 --- a/VERSIONS +++ b/VERSIONS @@ -28,6 +28,7 @@ development head (in the master branch): SIMD optimizations for spatial interaction strength calculations, thanks to Andy Kern, https://github.com/MesserLab/SLiM/pull/590 fix #593, Student's T distribution had a sign error in tdist(), thanks to Andy Kern, https://github.com/MesserLab/SLiM/issues/591 add comprehensive tests for spatial interaction kernel calculations; includes SIMD vs scalar consistency tests, C++ level tests, and script-level tests, thanks to Andy Kern, https://github.com/MesserLab/SLiM/pull/592 + add SIMD acceleration for trig functions (sin(), cos(), tan(), asin(), acos(), atan(), atan2()), power functions (^ opperator, pow, SpatialMap.pow()), thanks to Andy Kern, https://github.com/MesserLab/SLiM/pull/595 version 5.1 (Eidos version 4.1):