From 93dd7f36f2066ec52137178ee52052f293e5e743 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Thu, 21 Apr 2022 12:11:01 -0400 Subject: [PATCH] libstdc++: Avoid ASCII assumptions in floating_from_chars.cc In starts_with_ci and in __floating_from_chars_hex's inf/nan handling, we were assuming that the letters are contiguous and that 'A' + 32 == 'a' which is true for ASCII but not for other character encodings. This patch fixes starts_with_ci by using a constexpr lookup table that maps uppercase letters to lowercase, and fixes __floating_from_chars_hex by using __from_chars_alnum_to_val. libstdc++-v3/ChangeLog: * include/std/charconv (__from_chars_alnum_to_val_table): Simplify initialization of __lower/__upper_letters. (__from_chars_alnum_to_val): Default the template parameter to false. * src/c++17/floating_from_chars.cc (starts_with_ci): Don't assume the uppercase and lowercase letters are contiguous. (__floating_from_chars_hex): Likewise. --- libstdc++-v3/include/std/charconv | 12 ++----- libstdc++-v3/src/c++17/floating_from_chars.cc | 33 ++++++++++++++----- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/libstdc++-v3/include/std/charconv b/libstdc++-v3/include/std/charconv index 561234cb2fc..218813e4797 100644 --- a/libstdc++-v3/include/std/charconv +++ b/libstdc++-v3/include/std/charconv @@ -412,14 +412,8 @@ namespace __detail constexpr auto __from_chars_alnum_to_val_table() { - constexpr unsigned char __lower_letters[] - = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', - 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', - 'u', 'v', 'w', 'x', 'y', 'z' }; - constexpr unsigned char __upper_letters[] - = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', - 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', - 'U', 'V', 'W', 'X', 'Y', 'Z' }; + constexpr unsigned char __lower_letters[27] = "abcdefghijklmnopqrstuvwxyz"; + constexpr unsigned char __upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; struct { unsigned char __data[1u << __CHAR_BIT__] = {}; } __table; for (auto& __entry : __table.__data) __entry = 127; @@ -437,7 +431,7 @@ namespace __detail // return its corresponding base-10 value, otherwise return a value >= 127. // If _DecOnly is false: if the character is an alphanumeric digit, then // return its corresponding base-36 value, otherwise return a value >= 127. - template + template unsigned char __from_chars_alnum_to_val(unsigned char __c) { diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc b/libstdc++-v3/src/c++17/floating_from_chars.cc index 0f5183aa9b5..13de1e346ab 100644 --- a/libstdc++-v3/src/c++17/floating_from_chars.cc +++ b/libstdc++-v3/src/c++17/floating_from_chars.cc @@ -30,6 +30,7 @@ // Prefer to use std::pmr::string if possible, which requires the cxx11 ABI. #define _GLIBCXX_USE_CXX11_ABI 1 +#include #include #include #include @@ -451,15 +452,33 @@ namespace #if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64 // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case. + // PREFIX is assumed to not contain any uppercase letters. bool starts_with_ci(const char* first, const char* last, string_view prefix) { __glibcxx_requires_valid_range(first, last); - for (char ch : prefix) + // A lookup table that maps uppercase letters to lowercase and + // is otherwise the identity mapping. + static constexpr auto upper_to_lower_table = [] { + constexpr unsigned char lower_letters[27] = "abcdefghijklmnopqrstuvwxyz"; + constexpr unsigned char upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::array table = {}; + for (unsigned i = 0; i < table.size(); ++i) + table[i] = i; + for (unsigned i = 0; i < 26; ++i) + table[upper_letters[i]] = lower_letters[i]; + return table; + }(); + + if (last - first < static_cast(prefix.length())) + return false; + + for (const unsigned char pch : prefix) { - __glibcxx_assert(ch >= 'a' && ch <= 'z'); - if (first == last || (*first != ch && *first != ch - 32)) + // __glibcxx_assert(pch == upper_to_lower_table[pch]); + const unsigned char ch = *first; + if (ch != pch && upper_to_lower_table[ch] != pch) return false; ++first; } @@ -535,10 +554,8 @@ namespace ++first; break; } - else if ((ch >= '0' && ch <= '9') - || (ch >= 'a' && ch <= 'z') - || (ch >= 'A' && ch <= 'Z') - || ch == '_') + else if (ch == '_' + || __detail::__from_chars_alnum_to_val(ch) < 127) continue; else { @@ -599,7 +616,7 @@ namespace continue; } - int hexit = __detail::__from_chars_alnum_to_val(ch); + int hexit = __detail::__from_chars_alnum_to_val(ch); if (hexit >= 16) break; seen_hexit = true;