libstdc++: Avoid ASCII assumptions in floating_from_chars.cc

In starts_with_ci and in __floating_from_chars_hex's inf/nan handling,
we were assuming that the letters are contiguous and that 'A' + 32 == 'a'
which is true for ASCII but not for other character encodings.

This patch fixes starts_with_ci by using a constexpr lookup table that
maps uppercase letters to lowercase, and fixes __floating_from_chars_hex
by using __from_chars_alnum_to_val.

libstdc++-v3/ChangeLog:

	* include/std/charconv (__from_chars_alnum_to_val_table):
	Simplify initialization of __lower/__upper_letters.
	(__from_chars_alnum_to_val): Default the template parameter to
	false.
	* src/c++17/floating_from_chars.cc (starts_with_ci): Don't
	assume the uppercase and lowercase letters are contiguous.
	(__floating_from_chars_hex): Likewise.
This commit is contained in:
Patrick Palka 2022-04-21 12:11:01 -04:00
parent 605a80bb73
commit 93dd7f36f2
2 changed files with 28 additions and 17 deletions

View file

@ -412,14 +412,8 @@ namespace __detail
constexpr auto
__from_chars_alnum_to_val_table()
{
constexpr unsigned char __lower_letters[]
= { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y', 'z' };
constexpr unsigned char __upper_letters[]
= { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z' };
constexpr unsigned char __lower_letters[27] = "abcdefghijklmnopqrstuvwxyz";
constexpr unsigned char __upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
struct { unsigned char __data[1u << __CHAR_BIT__] = {}; } __table;
for (auto& __entry : __table.__data)
__entry = 127;
@ -437,7 +431,7 @@ namespace __detail
// return its corresponding base-10 value, otherwise return a value >= 127.
// If _DecOnly is false: if the character is an alphanumeric digit, then
// return its corresponding base-36 value, otherwise return a value >= 127.
template<bool _DecOnly>
template<bool _DecOnly = false>
unsigned char
__from_chars_alnum_to_val(unsigned char __c)
{

View file

@ -30,6 +30,7 @@
// Prefer to use std::pmr::string if possible, which requires the cxx11 ABI.
#define _GLIBCXX_USE_CXX11_ABI 1
#include <array>
#include <charconv>
#include <bit>
#include <string>
@ -451,15 +452,33 @@ namespace
#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
// Return true iff [FIRST,LAST) begins with PREFIX, ignoring case.
// PREFIX is assumed to not contain any uppercase letters.
bool
starts_with_ci(const char* first, const char* last, string_view prefix)
{
__glibcxx_requires_valid_range(first, last);
for (char ch : prefix)
// A lookup table that maps uppercase letters to lowercase and
// is otherwise the identity mapping.
static constexpr auto upper_to_lower_table = [] {
constexpr unsigned char lower_letters[27] = "abcdefghijklmnopqrstuvwxyz";
constexpr unsigned char upper_letters[27] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
std::array<unsigned char, (1u << __CHAR_BIT__)> table = {};
for (unsigned i = 0; i < table.size(); ++i)
table[i] = i;
for (unsigned i = 0; i < 26; ++i)
table[upper_letters[i]] = lower_letters[i];
return table;
}();
if (last - first < static_cast<ptrdiff_t>(prefix.length()))
return false;
for (const unsigned char pch : prefix)
{
__glibcxx_assert(ch >= 'a' && ch <= 'z');
if (first == last || (*first != ch && *first != ch - 32))
// __glibcxx_assert(pch == upper_to_lower_table[pch]);
const unsigned char ch = *first;
if (ch != pch && upper_to_lower_table[ch] != pch)
return false;
++first;
}
@ -535,10 +554,8 @@ namespace
++first;
break;
}
else if ((ch >= '0' && ch <= '9')
|| (ch >= 'a' && ch <= 'z')
|| (ch >= 'A' && ch <= 'Z')
|| ch == '_')
else if (ch == '_'
|| __detail::__from_chars_alnum_to_val(ch) < 127)
continue;
else
{
@ -599,7 +616,7 @@ namespace
continue;
}
int hexit = __detail::__from_chars_alnum_to_val<false>(ch);
int hexit = __detail::__from_chars_alnum_to_val(ch);
if (hexit >= 16)
break;
seen_hexit = true;