From a54137c88061c7495728fc6b8dfd0474e812b2cb Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Fri, 15 Apr 2022 09:34:09 -0400 Subject: [PATCH] libstdc++: Optimize integer std::from_chars This applies the following optimizations to the integer std::from_chars implementation: 1. Use a lookup table for converting an alphanumeric digit to its base-36 value instead of using a range test (for 0-9) and switch (for a-z and A-Z). The table is constructed using a C++14 constexpr function which doesn't assume a particular character encoding or __CHAR_BIT__ value. This new conversion function __from_chars_alnum_to_val is templated on whether we care only about the decimal digits, in which case we can perform the conversion with a single subtraction since the digit characters are guaranteed to be contiguous (unlike the letters). 2. Generalize __from_chars_binary to handle all power-of-two bases. This function (now named __from_chars_pow2_base) is also templated on whether we care only about the decimal digits for the benefit of faster digit conversion for base 2, 4 and 8. 3. In __from_chars_digit, use static_cast(__c - '0') < __base instead of '0' <= __c && __c <= ('0' + (__base - 1)). as the digit recognition test (exhaustively verified that the two tests are equivalent). 4. In __from_chars_alnum, use a nested loop to consume the rest of the digits in the overflow case (mirroring __from_chars_digit) so that the main loop doesn't have to maintain the overflow flag __valid. At this point, __from_chars_digit is nearly identical to __from_chars_alnum, so this patch merges the two functions by removing the former and templatizing the latter according to whether we care only about the decimal digits. Finally, 5. In __from_chars_alnum, maintain a lower bound on the number of unused bits in the result and use it to omit the overflow check when it's safe to do so. In passing, this patch replaces the non-portable function ascii_to_hexit used by __floating_from_chars_hex with the new conversion function. Some runtime measurements for a simple 15-line benchmark that roundtrips printing/parsing 200 million integers via std::to/from_chars (average of 5 runs): Base Before After (seconds, lower is better) 2 9.37 9.37 3 15.79 12.13 8 4.15 3.67 10 4.90 3.86 11 6.84 5.03 16 4.14 2.93 32 3.85 2.39 36 5.22 3.26 libstdc++-v3/ChangeLog: * include/std/charconv (__from_chars_alnum_to_val_table): Define. (__from_chars_alnum_to_val): Define. (__from_chars_binary): Rename to ... (__from_chars_pow2_base): ... this. Generalize to handle any power-of-two base using __from_chars_alnum_to_val. (__from_chars_digit): Optimize digit recognition to a single test instead of two tests. Use [[__unlikely___]] attribute. (__from_chars_alpha_to_num): Remove. (__from_chars_alnum): Use __from_chars_alnum_to_val. Use a nested loop for the overflow case. Maintain a lower bound on the number of available bits in the result and use it to omit the overflow check. (from_chars): Adjust appropriately. * src/c++17/floating_from_chars.cc (ascii_to_hexit): Remove. (__floating_from_chars_hex): Use __from_chars_alnum_to_val to recognize a hex digit instead. --- libstdc++-v3/include/std/charconv | 255 +++++++----------- libstdc++-v3/src/c++17/floating_from_chars.cc | 18 +- 2 files changed, 106 insertions(+), 167 deletions(-) diff --git a/libstdc++-v3/include/std/charconv b/libstdc++-v3/include/std/charconv index 2ce9c7d4cb9..dda4ec87779 100644 --- a/libstdc++-v3/include/std/charconv +++ b/libstdc++-v3/include/std/charconv @@ -407,176 +407,124 @@ namespace __detail return true; } - /// std::from_chars implementation for integers in base 2. - template + // Construct and return a lookup table that maps 0-9, A-Z and a-z to their + // corresponding base-36 value and maps all other characters to 127. + constexpr auto + __from_chars_alnum_to_val_table() + { + constexpr unsigned char __lower_letters[] + = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', + 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', + 'u', 'v', 'w', 'x', 'y', 'z' }; + constexpr unsigned char __upper_letters[] + = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', + 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z' }; + struct { unsigned char __data[1u << __CHAR_BIT__] = {}; } __table; + for (auto& __entry : __table.__data) + __entry = 127; + for (int __i = 0; __i < 10; ++__i) + __table.__data['0' + __i] = __i; + for (int __i = 0; __i < 26; ++__i) + { + __table.__data[__lower_letters[__i]] = 10 + __i; + __table.__data[__upper_letters[__i]] = 10 + __i; + } + return __table; + } + + // If _DecOnly is true: if the character is a decimal digit, then + // return its corresponding base-10 value, otherwise return a value >= 127. + // If _DecOnly is false: if the character is an alphanumeric digit, then + // return its corresponding base-36 value, otherwise return a value >= 127. + template + unsigned char + __from_chars_alnum_to_val(unsigned char __c) + { + if _GLIBCXX17_CONSTEXPR (_DecOnly) + return __c - '0'; + else + { + static constexpr auto __table = __from_chars_alnum_to_val_table(); + return __table.__data[__c]; + } + } + + /// std::from_chars implementation for integers in a power-of-two base. + /// If _DecOnly is true, then we may assume __base is at most 8. + template bool - __from_chars_binary(const char*& __first, const char* __last, _Tp& __val) + __from_chars_pow2_base(const char*& __first, const char* __last, _Tp& __val, + int __base) { static_assert(is_integral<_Tp>::value, "implementation bug"); static_assert(is_unsigned<_Tp>::value, "implementation bug"); + // __glibcxx_assert((__base & (__base - 1)) == 0); + // __glibcxx_assert(_DecOnly ? __base <= 8 : __base <= 32); + const int __log2_base = __countr_zero(__base); + const ptrdiff_t __len = __last - __first; ptrdiff_t __i = 0; while (__i < __len && __first[__i] == '0') ++__i; const ptrdiff_t __leading_zeroes = __i; - while (__i < __len) + unsigned char __leading_c = 0; + for (; __i < __len; ++__i) { - const unsigned char __c = (unsigned)__first[__i] - '0'; - if (__c < 2) - __val = (__val << 1) | __c; - else + const unsigned char __c = __from_chars_alnum_to_val<_DecOnly>(__first[__i]); + if (__c >= __base) break; - __i++; + __val = (__val << __log2_base) | __c; + + if (__i == __leading_zeroes) + { + // At the first iteration, remember the leading significant digit. + // __glibcxx_assert(__leading_c == 0 && __c != 0); + __leading_c = __c; + } } __first += __i; - return (__i - __leading_zeroes) <= __gnu_cxx::__int_traits<_Tp>::__digits; + auto __significant_bits = (__i - __leading_zeroes) * __log2_base; + if (__base != 2 && __leading_c != 0) + // Compensate for a leading significant digit that didn't use all + // of its available bits. + __significant_bits -= __log2_base - __bit_width(__leading_c); + // __glibcxx_assert(__significant_bits >= 0); + return __significant_bits <= __gnu_cxx::__int_traits<_Tp>::__digits; } - /// std::from_chars implementation for integers in bases 3 to 10. - template - bool - __from_chars_digit(const char*& __first, const char* __last, _Tp& __val, - int __base) - { - static_assert(is_integral<_Tp>::value, "implementation bug"); - static_assert(is_unsigned<_Tp>::value, "implementation bug"); - - auto __matches = [__base](char __c) { - return '0' <= __c && __c <= ('0' + (__base - 1)); - }; - - while (__first != __last) - { - const char __c = *__first; - if (__matches(__c)) - { - if (!__raise_and_add(__val, __base, __c - '0')) - { - while (++__first != __last && __matches(*__first)) - ; - return false; - } - __first++; - } - else - return true; - } - return true; - } - - constexpr char - __from_chars_alpha_to_num(char __c) - { - switch (__c) - { - case 'a': - case 'A': - return 10; - case 'b': - case 'B': - return 11; - case 'c': - case 'C': - return 12; - case 'd': - case 'D': - return 13; - case 'e': - case 'E': - return 14; - case 'f': - case 'F': - return 15; - case 'g': - case 'G': - return 16; - case 'h': - case 'H': - return 17; - case 'i': - case 'I': - return 18; - case 'j': - case 'J': - return 19; - case 'k': - case 'K': - return 20; - case 'l': - case 'L': - return 21; - case 'm': - case 'M': - return 22; - case 'n': - case 'N': - return 23; - case 'o': - case 'O': - return 24; - case 'p': - case 'P': - return 25; - case 'q': - case 'Q': - return 26; - case 'r': - case 'R': - return 27; - case 's': - case 'S': - return 28; - case 't': - case 'T': - return 29; - case 'u': - case 'U': - return 30; - case 'v': - case 'V': - return 31; - case 'w': - case 'W': - return 32; - case 'x': - case 'X': - return 33; - case 'y': - case 'Y': - return 34; - case 'z': - case 'Z': - return 35; - } - return 127; - } - - /// std::from_chars implementation for integers in bases 11 to 36. - template + /// std::from_chars implementation for integers in any base. + /// If _DecOnly is true, then we may assume __base is at most 10. + template bool __from_chars_alnum(const char*& __first, const char* __last, _Tp& __val, int __base) { - bool __valid = true; - while (__first != __last) - { - char __c = *__first; - if ('0' <= __c && __c <= '9') // isdigit - __c -= '0'; - else - { - __c = __from_chars_alpha_to_num(__c); - if (__c >= __base) - break; - } + // __glibcxx_assert(!_DecOnly || __base <= 10); - if (__builtin_expect(__valid, 1)) - __valid = __raise_and_add(__val, __base, __c); - __first++; + const int __bits_per_digit = __bit_width(__base); + int __unused_bits_lower_bound = __gnu_cxx::__int_traits<_Tp>::__digits; + for (; __first != __last; ++__first) + { + const unsigned char __c = __from_chars_alnum_to_val<_DecOnly>(*__first); + if (__c >= __base) + return true; + + __unused_bits_lower_bound -= __bits_per_digit; + if (__unused_bits_lower_bound >= 0) [[__likely__]] + // We're definitely not going to overflow. + __val = __val * __base + __c; + else if (!__raise_and_add(__val, __base, __c)) [[__unlikely__]] + { + while (++__first != __last + && __from_chars_alnum_to_val<_DecOnly>(*__first) < __base) + ; + return false; + } } - return __valid; + return true; } template @@ -611,12 +559,17 @@ namespace __detail const auto __start = __first; bool __valid; - if (__base == 2) - __valid = __detail::__from_chars_binary(__first, __last, __val); + if ((__base & (__base - 1)) == 0) + { + if (__base <= 8) + __valid = __detail::__from_chars_pow2_base(__first, __last, __val, __base); + else + __valid = __detail::__from_chars_pow2_base(__first, __last, __val, __base); + } else if (__base <= 10) - __valid = __detail::__from_chars_digit(__first, __last, __val, __base); + __valid = __detail::__from_chars_alnum(__first, __last, __val, __base); else - __valid = __detail::__from_chars_alnum(__first, __last, __val, __base); + __valid = __detail::__from_chars_alnum(__first, __last, __val, __base); if (__builtin_expect(__first == __start, 0)) __res.ec = errc::invalid_argument; diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc b/libstdc++-v3/src/c++17/floating_from_chars.cc index 4aa2483bc28..bbe03f7f068 100644 --- a/libstdc++-v3/src/c++17/floating_from_chars.cc +++ b/libstdc++-v3/src/c++17/floating_from_chars.cc @@ -451,20 +451,6 @@ namespace #endif // USE_STRTOD_FOR_FROM_CHARS #if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 - // If the given ASCII character represents a hexit, return that hexit. - // Otherwise return -1. - int - ascii_to_hexit(char ch) - { - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'a' && ch <= 'f') - return ch - 'a' + 10; - if (ch >= 'A' && ch <= 'F') - return ch - 'A' + 10; - return -1; - } - // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case. bool starts_with_ci(const char* first, const char* last, string_view prefix) @@ -614,8 +600,8 @@ namespace continue; } - int hexit = ascii_to_hexit(ch); - if (hexit == -1) + int hexit = __detail::__from_chars_alnum_to_val(ch); + if (hexit >= 16) break; seen_hexit = true;