diff --git a/stl/inc/algorithm b/stl/inc/algorithm index b9ff3c08e5..d41ac08738 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -64,9 +64,9 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8( _Min_max_element_t __stdcall __std_minmax_element_1(const void* _First, const void* _Last, bool _Signed) noexcept; _Min_max_element_t __stdcall __std_minmax_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; _Min_max_element_t __stdcall __std_minmax_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; -#if !defined(_M_ARM64) && !defined(_M_ARM64EC) +#if _VECTORIZED_MINMAX_ELEMENT_64BIT_INT _Min_max_element_t __stdcall __std_minmax_element_8(const void* _First, const void* _Last, bool _Signed) noexcept; -#endif // ^^^ !defined(_M_ARM64) && !defined(_M_ARM64EC) ^^^ +#endif // ^^^ _VECTORIZED_MINMAX_ELEMENT_64BIT_INT ^^^ _Min_max_element_t __stdcall __std_minmax_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; _Min_max_element_t __stdcall __std_minmax_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; #endif // ^^^ _VECTORIZED_MINMAX_ELEMENT ^^^ @@ -225,11 +225,11 @@ pair<_Ty*, _Ty*> _Minmax_element_vectorized(_Ty* const _First, _Ty* const _Last) } else if constexpr (sizeof(_Ty) == 4) { _Res = ::__std_minmax_element_4(_First, _Last, _Signed); } else if constexpr (sizeof(_Ty) == 8) { -#if defined(_M_ARM64) || defined(_M_ARM64EC) - static_assert(false, "unexpected size; 64-bit integers on ARM64/ARM64EC should not take this codepath"); -#else // ^^^ defined(_M_ARM64) || defined(_M_ARM64EC) / !defined(_M_ARM64) && !defined(_M_ARM64EC) vvv +#if _VECTORIZED_MINMAX_ELEMENT_64BIT_INT _Res = ::__std_minmax_element_8(_First, _Last, _Signed); -#endif // ^^^ !defined(_M_ARM64) && !defined(_M_ARM64EC) ^^^ +#else // ^^^ _VECTORIZED_MINMAX_ELEMENT_64BIT_INT / !_VECTORIZED_MINMAX_ELEMENT_64BIT_INT vvv + static_assert(false, "unexpected size; 64-bit integers on ARM64/ARM64EC should not take this codepath"); +#endif // ^^^ !_VECTORIZED_MINMAX_ELEMENT_64BIT_INT ^^^ } else { static_assert(false, "unexpected size"); } diff --git a/stl/inc/xutility b/stl/inc/xutility index 2d5bd7203c..ba9db44c1e 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -61,6 +61,9 @@ _STL_DISABLE_CLANG_WARNINGS #if !_USE_STD_VECTOR_ALGORITHMS #define _VECTORIZED_FOR_X64_X86 0 #define _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC 0 +#elif defined(_M_ARM64EC) && defined(_ENABLE_STL_INTERNAL_CHECK) +#define _VECTORIZED_FOR_X64_X86 1 +#define _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC 1 #elif defined(_M_ARM64) || defined(_M_ARM64EC) #define _VECTORIZED_FOR_X64_X86 0 #define _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC 1 @@ -71,32 +74,33 @@ _STL_DISABLE_CLANG_WARNINGS #error Unknown architecture #endif // ^^^ unknown architecture ^^^ -#define _VECTORIZED_ADJACENT_FIND _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_BITSET_FROM_STRING _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_BITSET_TO_STRING _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_COUNT _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_FIND _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_FIND_END _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_FIND_FIRST_OF _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_FIND_LAST _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_FIND_LAST_OF _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_INCLUDES _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_IS_SORTED_UNTIL _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_MINMAX _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_MINMAX_ELEMENT _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_MISMATCH _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_REMOVE _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_REMOVE_COPY _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_REPLACE _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_REPLACE_COPY _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_REVERSE _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_REVERSE_COPY _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_ROTATE _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_SEARCH _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_SEARCH_N _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_SWAP_RANGES _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC -#define _VECTORIZED_UNIQUE _VECTORIZED_FOR_X64_X86 -#define _VECTORIZED_UNIQUE_COPY _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_ADJACENT_FIND _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_BITSET_FROM_STRING _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_BITSET_TO_STRING _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_COUNT _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_FIND _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_FIND_END _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_FIND_FIRST_OF _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_FIND_LAST _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_FIND_LAST_OF _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_INCLUDES _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_IS_SORTED_UNTIL _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_MINMAX _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_MINMAX_ELEMENT _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_MINMAX_ELEMENT_64BIT_INT _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_MISMATCH _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_REMOVE _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_REMOVE_COPY _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_REPLACE _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_REPLACE_COPY _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_REVERSE _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_REVERSE_COPY _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_ROTATE _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_SEARCH _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_SEARCH_N _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_SWAP_RANGES _VECTORIZED_FOR_X64_X86_ARM64_ARM64EC +#define _VECTORIZED_UNIQUE _VECTORIZED_FOR_X64_X86 +#define _VECTORIZED_UNIQUE_COPY _VECTORIZED_FOR_X64_X86 #ifndef _USE_STD_VECTOR_FLOATING_ALGORITHMS #if _USE_STD_VECTOR_ALGORITHMS && !defined(_M_FP_EXCEPT) @@ -193,18 +197,18 @@ const void* __stdcall __std_find_end_8( const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; -#if !defined(_M_ARM64) && !defined(_M_ARM64EC) +#if _VECTORIZED_MINMAX_ELEMENT_64BIT_INT const void* __stdcall __std_min_element_8(const void* _First, const void* _Last, bool _Signed) noexcept; -#endif // ^^^ !defined(_M_ARM64) && !defined(_M_ARM64EC) ^^^ +#endif // ^^^ _VECTORIZED_MINMAX_ELEMENT_64BIT_INT ^^^ const void* __stdcall __std_min_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; const void* __stdcall __std_min_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; const void* __stdcall __std_max_element_1(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_max_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_max_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; -#if !defined(_M_ARM64) && !defined(_M_ARM64EC) +#if _VECTORIZED_MINMAX_ELEMENT_64BIT_INT const void* __stdcall __std_max_element_8(const void* _First, const void* _Last, bool _Signed) noexcept; -#endif // ^^^ !defined(_M_ARM64) && !defined(_M_ARM64EC) ^^^ +#endif // ^^^ _VECTORIZED_MINMAX_ELEMENT_64BIT_INT ^^^ const void* __stdcall __std_max_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; const void* __stdcall __std_max_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; #endif // ^^^ _VECTORIZED_MINMAX_ELEMENT ^^^ @@ -425,11 +429,11 @@ _Ty* _Min_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept { } else if constexpr (sizeof(_Ty) == 4) { return const_cast<_Ty*>(static_cast(::__std_min_element_4(_First, _Last, _Signed))); } else if constexpr (sizeof(_Ty) == 8) { -#if defined(_M_ARM64) || defined(_M_ARM64EC) - static_assert(false, "unexpected size; 64-bit integers on ARM64/ARM64EC should not take this codepath"); -#else // ^^^ defined(_M_ARM64) || defined(_M_ARM64EC) / !defined(_M_ARM64) && !defined(_M_ARM64EC) vvv +#if _VECTORIZED_MINMAX_ELEMENT_64BIT_INT return const_cast<_Ty*>(static_cast(::__std_min_element_8(_First, _Last, _Signed))); -#endif // ^^^ !defined(_M_ARM64) && !defined(_M_ARM64EC) ^^^ +#else // ^^^ _VECTORIZED_MINMAX_ELEMENT_64BIT_INT / !_VECTORIZED_MINMAX_ELEMENT_64BIT_INT vvv + static_assert(false, "unexpected size; 64-bit integers on ARM64/ARM64EC should not take this codepath"); +#endif // ^^^ !_VECTORIZED_MINMAX_ELEMENT_64BIT_INT ^^^ } else { static_assert(false, "unexpected size"); } @@ -450,11 +454,11 @@ _Ty* _Max_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept { } else if constexpr (sizeof(_Ty) == 4) { return const_cast<_Ty*>(static_cast(::__std_max_element_4(_First, _Last, _Signed))); } else if constexpr (sizeof(_Ty) == 8) { -#if defined(_M_ARM64) || defined(_M_ARM64EC) - static_assert(false, "unexpected size; 64-bit integers on ARM64/ARM64EC should not take this codepath"); -#else // ^^^ defined(_M_ARM64) || defined(_M_ARM64EC) / !defined(_M_ARM64) && !defined(_M_ARM64EC) vvv +#if _VECTORIZED_MINMAX_ELEMENT_64BIT_INT return const_cast<_Ty*>(static_cast(::__std_max_element_8(_First, _Last, _Signed))); -#endif // ^^^ !defined(_M_ARM64) && !defined(_M_ARM64EC) ^^^ +#else // ^^^ _VECTORIZED_MINMAX_ELEMENT_64BIT_INT / !_VECTORIZED_MINMAX_ELEMENT_64BIT_INT vvv + static_assert(false, "unexpected size; 64-bit integers on ARM64/ARM64EC should not take this codepath"); +#endif // ^^^ !_VECTORIZED_MINMAX_ELEMENT_64BIT_INT ^^^ } else { static_assert(false, "unexpected size"); } @@ -7304,15 +7308,15 @@ constexpr bool _Is_predicate_greater = _Is_any_of_v<_Pr, #endif // _HAS_CXX20 greater<>, greater<_Iter_value_t<_Iter>>>; -#if defined(_M_ARM64) || defined(_M_ARM64EC) +#if _VECTORIZED_MINMAX_ELEMENT_64BIT_INT +template +_INLINE_VAR constexpr bool _Is_64bit_int_on_arm64_arm64ec = false; +#else // ^^^ _VECTORIZED_MINMAX_ELEMENT_64BIT_INT / !_VECTORIZED_MINMAX_ELEMENT_64BIT_INT vvv // We choose not to vectorize minmax_element for 64-bit integers on ARM64/ARM64EC // as this does not improve performance over the scalar code. template _INLINE_VAR constexpr bool _Is_64bit_int_on_arm64_arm64ec = sizeof(_Ty) == 8 && !is_floating_point_v<_Ty>; -#else // ^^^ defined(_M_ARM64) || defined(_M_ARM64EC) / !defined(_M_ARM64) && !defined(_M_ARM64EC) vvv -template -_INLINE_VAR constexpr bool _Is_64bit_int_on_arm64_arm64ec = false; -#endif // ^^^ !defined(_M_ARM64) && !defined(_M_ARM64EC) ^^^ +#endif // ^^^ !_VECTORIZED_MINMAX_ELEMENT_64BIT_INT ^^^ template constexpr bool _Is_min_max_optimization_safe = // Activate the vector algorithms for min_/max_element?