libstdc++: Avoid ASCII assumptions in floating_from_chars.cc
In starts_with_ci and in __floating_from_chars_hex's inf/nan handling, we were assuming that the letters are contiguous and that 'A' + 32 == 'a' which is true for ASCII but not for other character encodings. This patch fixes starts_with_ci by using a constexpr lookup table that maps uppercase letters to lowercase, and fixes __floating_from_chars_hex by using __from_chars_alnum_to_val. libstdc++-v3/ChangeLog: * include/std/charconv (__from_chars_alnum_to_val_table): Simplify initialization of __lower/__upper_letters. (__from_chars_alnum_to_val): Default the template parameter to false. * src/c++17/floating_from_chars.cc (starts_with_ci): Don't assume the uppercase and lowercase letters are contiguous. (__floating_from_chars_hex): Likewise.
This commit is contained in:
parent
605a80bb73
commit
93dd7f36f2
2 changed files with 28 additions and 17 deletions
|
@ -412,14 +412,8 @@ namespace __detail
|
|||
constexpr auto
|
||||
__from_chars_alnum_to_val_table()
|
||||
{
|
||||
constexpr unsigned char __lower_letters[]
|
||||
= { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
|
||||
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
|
||||
'u', 'v', 'w', 'x', 'y', 'z' };
|
||||
constexpr unsigned char __upper_letters[]
|
||||
= { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
|
||||
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
|
||||
'U', 'V', 'W', 'X', 'Y', 'Z' };
|
||||
constexpr unsigned char __lower_letters[27] = "abcdefghijklmnopqrstuvwxyz";
|
||||
constexpr unsigned char __upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
struct { unsigned char __data[1u << __CHAR_BIT__] = {}; } __table;
|
||||
for (auto& __entry : __table.__data)
|
||||
__entry = 127;
|
||||
|
@ -437,7 +431,7 @@ namespace __detail
|
|||
// return its corresponding base-10 value, otherwise return a value >= 127.
|
||||
// If _DecOnly is false: if the character is an alphanumeric digit, then
|
||||
// return its corresponding base-36 value, otherwise return a value >= 127.
|
||||
template<bool _DecOnly>
|
||||
template<bool _DecOnly = false>
|
||||
unsigned char
|
||||
__from_chars_alnum_to_val(unsigned char __c)
|
||||
{
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
// Prefer to use std::pmr::string if possible, which requires the cxx11 ABI.
|
||||
#define _GLIBCXX_USE_CXX11_ABI 1
|
||||
|
||||
#include <array>
|
||||
#include <charconv>
|
||||
#include <bit>
|
||||
#include <string>
|
||||
|
@ -451,15 +452,33 @@ namespace
|
|||
|
||||
#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
|
||||
// Return true iff [FIRST,LAST) begins with PREFIX, ignoring case.
|
||||
// PREFIX is assumed to not contain any uppercase letters.
|
||||
bool
|
||||
starts_with_ci(const char* first, const char* last, string_view prefix)
|
||||
{
|
||||
__glibcxx_requires_valid_range(first, last);
|
||||
|
||||
for (char ch : prefix)
|
||||
// A lookup table that maps uppercase letters to lowercase and
|
||||
// is otherwise the identity mapping.
|
||||
static constexpr auto upper_to_lower_table = [] {
|
||||
constexpr unsigned char lower_letters[27] = "abcdefghijklmnopqrstuvwxyz";
|
||||
constexpr unsigned char upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
std::array<unsigned char, (1u << __CHAR_BIT__)> table = {};
|
||||
for (unsigned i = 0; i < table.size(); ++i)
|
||||
table[i] = i;
|
||||
for (unsigned i = 0; i < 26; ++i)
|
||||
table[upper_letters[i]] = lower_letters[i];
|
||||
return table;
|
||||
}();
|
||||
|
||||
if (last - first < static_cast<ptrdiff_t>(prefix.length()))
|
||||
return false;
|
||||
|
||||
for (const unsigned char pch : prefix)
|
||||
{
|
||||
__glibcxx_assert(ch >= 'a' && ch <= 'z');
|
||||
if (first == last || (*first != ch && *first != ch - 32))
|
||||
// __glibcxx_assert(pch == upper_to_lower_table[pch]);
|
||||
const unsigned char ch = *first;
|
||||
if (ch != pch && upper_to_lower_table[ch] != pch)
|
||||
return false;
|
||||
++first;
|
||||
}
|
||||
|
@ -535,10 +554,8 @@ namespace
|
|||
++first;
|
||||
break;
|
||||
}
|
||||
else if ((ch >= '0' && ch <= '9')
|
||||
|| (ch >= 'a' && ch <= 'z')
|
||||
|| (ch >= 'A' && ch <= 'Z')
|
||||
|| ch == '_')
|
||||
else if (ch == '_'
|
||||
|| __detail::__from_chars_alnum_to_val(ch) < 127)
|
||||
continue;
|
||||
else
|
||||
{
|
||||
|
@ -599,7 +616,7 @@ namespace
|
|||
continue;
|
||||
}
|
||||
|
||||
int hexit = __detail::__from_chars_alnum_to_val<false>(ch);
|
||||
int hexit = __detail::__from_chars_alnum_to_val(ch);
|
||||
if (hexit >= 16)
|
||||
break;
|
||||
seen_hexit = true;
|
||||
|
|
Loading…
Add table
Reference in a new issue