diff --git a/benchmarks/src/integer_to_string.cpp b/benchmarks/src/integer_to_string.cpp index 8466545ba2..74ff03ff99 100644 --- a/benchmarks/src/integer_to_string.cpp +++ b/benchmarks/src/integer_to_string.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,26 @@ auto generate_array() { return a; } +template +void integer_to_chars(benchmark::State& state) { + auto a = generate_array(); + char d[20]; + + auto it = a.begin(); + for (auto _ : state) { + auto i = *it; + benchmark::DoNotOptimize(i); + auto s = to_chars(begin(d), end(d), i); + benchmark::DoNotOptimize(s.ec); + benchmark::DoNotOptimize(s.ptr); + + ++it; + if (it == a.end()) { + it = a.begin(); + } + } +} + template void internal_integer_to_buff(benchmark::State& state) { auto a = generate_array(); @@ -77,6 +98,11 @@ void integer_to_string(benchmark::State& state) { } } +BENCHMARK(integer_to_chars); +BENCHMARK(integer_to_chars); +BENCHMARK(integer_to_chars); +BENCHMARK(integer_to_chars); + BENCHMARK(internal_integer_to_buff); BENCHMARK(internal_integer_to_buff); BENCHMARK(internal_integer_to_buff); diff --git a/stl/inc/charconv b/stl/inc/charconv index 84b87511b6..e936921c32 100644 --- a/stl/inc/charconv +++ b/stl/inc/charconv @@ -64,31 +64,64 @@ _NODISCARD _CONSTEXPR23 to_chars_result _Integer_to_chars( switch (_Base) { case 10: { // Derived from _UIntegral_to_buff() - // Performance note: Ryu's digit table should be faster here. - constexpr bool _Use_chunks = sizeof(_Unsigned) > sizeof(size_t); - - if constexpr (_Use_chunks) { // For 64-bit numbers on 32-bit platforms, work in chunks to avoid 64-bit - // divisions. - while (_Value > 0xFFFF'FFFFU) { - // Performance note: Ryu's division workaround would be faster here. - unsigned long _Chunk = static_cast(_Value % 1'000'000'000); - _Value = static_cast<_Unsigned>(_Value / 1'000'000'000); - - for (int _Idx = 0; _Idx != 9; ++_Idx) { - *--_RNext = static_cast('0' + _Chunk % 10); - _Chunk /= 10; +#ifndef _WIN64 + auto _Trunc = _Value; +#else // ^^^ defined(_WIN64) / !defined(_WIN64) vvv + + if constexpr (sizeof(_Unsigned) > 4) { // For 64-bit numbers, work in chunks to avoid 64-bit divisions. + while (_Value > 0xFFFFFFFFU) { + auto _Value_chunk = static_cast(_Value % 100000000); + _Value /= 100000000; + + for (int _Idx = 0; _Idx != 3; ++_Idx) { + const unsigned long _Value_chunk_part = _Value_chunk % 100; + _Value_chunk /= 100; + _RNext -= 2; + if (!_STD _Is_constant_evaluated()) { + _CSTD memcpy(_RNext, _Digit_pairs._Data[_Value_chunk_part], 2); + } else { + _RNext[0] = _Digit_pairs._Data[_Value_chunk_part][0]; + _RNext[1] = _Digit_pairs._Data[_Value_chunk_part][1]; + } + } + + _RNext -= 2; + if (!_STD _Is_constant_evaluated()) { + _CSTD memcpy(_RNext, _Digit_pairs._Data[_Value_chunk], 2); + } else { + _RNext[0] = _Digit_pairs._Data[_Value_chunk][0]; + _RNext[1] = _Digit_pairs._Data[_Value_chunk][1]; } } } - using _Truncated = conditional_t<_Use_chunks, unsigned long, _Unsigned>; + auto _Trunc = static_cast(_Value); +#endif // ^^^ !defined(_WIN64) ^^^ - _Truncated _Trunc = static_cast<_Truncated>(_Value); + // If we have a single digit, print [0, 9] and return. (This is necessary to correctly handle 0.) + if (_Trunc < 10) { + *--_RNext = static_cast('0' + _Trunc); + break; + } + // Print one or more pairs of digits. do { - *--_RNext = static_cast('0' + _Trunc % 10); - _Trunc /= 10; - } while (_Trunc != 0); + const auto _Trunc_part = static_cast(_Trunc % 100); + _Trunc /= 100; + _RNext -= 2; + if (!_STD _Is_constant_evaluated()) { + _CSTD memcpy(_RNext, _Digit_pairs._Data[_Trunc_part], 2); + } else { + _RNext[0] = _Digit_pairs._Data[_Trunc_part][0]; + _RNext[1] = _Digit_pairs._Data[_Trunc_part][1]; + } + } while (_Trunc >= 10); + + // If we have an unpaired digit, print it. + // For example, 1729 is printed as 17 29, and 19937 is printed as 1 99 37. + if (_Trunc != 0) { + *--_RNext = static_cast('0' + _Trunc); + } break; } diff --git a/stl/inc/xmemory b/stl/inc/xmemory index 4bdb8fed77..6ee39ff8e2 100644 --- a/stl/inc/xmemory +++ b/stl/inc/xmemory @@ -2775,24 +2775,6 @@ template constexpr bool _Has_guaranteed_append_range = false; // N5032 [sequence.reqmts]/112; used by flat_(multi)set::insert. #endif // _HAS_CXX23 -template -struct _Digit_pair_table { - _Elem _Data[100][2]; - - constexpr explicit _Digit_pair_table() : _Data{} { - for (int _Idx = 0; _Idx != 100; ++_Idx) { - _Data[_Idx][0] = static_cast<_Elem>('0' + _Idx / 10); - _Data[_Idx][1] = static_cast<_Elem>('0' + _Idx % 10); - } - } - - _Digit_pair_table(const _Digit_pair_table&) = delete; - _Digit_pair_table& operator=(const _Digit_pair_table&) = delete; -}; - -template -constexpr _Digit_pair_table<_Elem> _Digit_pairs{}; - template _NODISCARD _Elem* _UIntegral_to_buff(_Elem* _RNext, _UTy _UVal) { // used by both to_string and thread::id output // format _UVal into buffer *ending at* _RNext @@ -2830,7 +2812,7 @@ _NODISCARD _Elem* _UIntegral_to_buff(_Elem* _RNext, _UTy _UVal) { // used by bot // Print one or more pairs of digits. do { - const unsigned long _UVal_trunc_part = _UVal_trunc % 100; + const auto _UVal_trunc_part = static_cast(_UVal_trunc % 100); _UVal_trunc /= 100; _RNext -= 2; _CSTD memcpy(_RNext, _Digit_pairs<_Elem>._Data[_UVal_trunc_part], 2 * sizeof(_Elem)); diff --git a/stl/inc/xutility b/stl/inc/xutility index e01eb99d15..609312659e 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -7975,6 +7975,24 @@ constexpr bool _Equivalence_is_equality = _Is_any_of_v<_Compare, _RANGES less, _RANGES greater> || (_Is_any_of_v<_Compare, less<>, less<_Key>, greater<>, greater<_Key>> && _Equivalence_is_equality_impl<_Key>); #endif // _HAS_CXX23 + +template +struct _Digit_pair_table { + _Elem _Data[100][2]; + + constexpr explicit _Digit_pair_table() : _Data{} { + for (int _Idx = 0; _Idx != 100; ++_Idx) { + _Data[_Idx][0] = static_cast<_Elem>('0' + _Idx / 10); + _Data[_Idx][1] = static_cast<_Elem>('0' + _Idx % 10); + } + } + + _Digit_pair_table(const _Digit_pair_table&) = delete; + _Digit_pair_table& operator=(const _Digit_pair_table&) = delete; +}; + +template +constexpr _Digit_pair_table<_Elem> _Digit_pairs{}; _STD_END // TRANSITION, non-_Ugly attribute tokens