libstdc++: Add floating-point std::to_chars implementation

This implements the floating-point std::to_chars overloads for float,
double and long double.  We use the Ryu library to compute the shortest
round-trippable fixed and scientific forms for float, double and long
double.  We also use Ryu for performing explicit-precision fixed and
scientific formatting for float and double. For explicit-precision
formatting for long double we fall back to using printf.  Hexadecimal
formatting for float, double and long double is implemented from
scratch.

The supported long double binary formats are binary64, binary80 (x86
80-bit extended precision), binary128 and ibm128.

Much of the complexity of the implementation is in computing the exact
output length before handing it off to Ryu (which doesn't do bounds
checking).  In some cases it's hard to compute the output length
beforehand, so in these cases we instead compute an upper bound on the
output length and use a sufficiently-sized intermediate buffer only if
necessary.

Another source of complexity is in the general-with-precision formatting
mode, where we need to do zero-trimming of the string returned by Ryu,
and where we also take care to avoid having to format the number through
Ryu a second time when the general formatting mode resolves to fixed
(which we determine by doing a scientific formatting first and
inspecting the scientific exponent).  We avoid going through Ryu twice
by instead transforming the scientific form to the corresponding fixed
form via in-place string manipulation.

This implementation is non-conforming in a couple of ways:

1. For the shortest hexadecimal formatting, we currently follow the
   Microsoft implementation's decision to be consistent with the
   output of printf's '%a' specifier at the expense of sometimes not
   printing the shortest representation.  For example, the shortest hex
   form for the number 1.08p+0 is 2.1p-1, but we output the former
   instead of the latter, as does printf.

2. The Ryu routine generic_binary_to_decimal that we use for performing
   shortest formatting for large floating point types is implemented
   using the __int128 type, but some targets with a large long double
   type lack __int128 (e.g. i686), so we can't perform shortest
   formatting of long double on such targets through Ryu.  As a
   temporary stopgap this patch makes the long double to_chars overloads
   just dispatch to the double overloads on these targets, which means
   we lose precision in the output.  (We could potentially fix this by
   writing a specialized version of Ryu's generic_binary_to_decimal
   routine that uses uint64_t instead of __int128.)  [Though I wonder if
   there's a better way to work around the lack of __int128 on i686
   specifically?]

3. Our shortest formatting for __ibm128 doesn't guarantee the round-trip
   property if the difference between the high- and low-order exponent
   is large.  This is because we treat __ibm128 as if it has a
   contiguous 105-bit mantissa by merging the mantissas of the high-
   and low-order parts (using code extracted from glibc), so we
   potentially lose precision from the low-order part.  This seems to be
   consistent with how glibc printf formats __ibm128.

libstdc++-v3/ChangeLog:

	* config/abi/pre/gnu.ver: Add new exports.
	* include/std/charconv (to_chars): Declare the floating-point
	overloads for float, double and long double.
	* src/c++17/Makefile.am (sources): Add floating_to_chars.cc.
	* src/c++17/Makefile.in: Regenerate.
	* src/c++17/floating_to_chars.cc: New file.
	(to_chars): Define for float, double and long double.
	* testsuite/20_util/to_chars/long_double.cc: New test.
This commit is contained in:
Patrick Palka 2020-12-17 23:11:34 -05:00
parent 5033506993
commit 3c57e69235
6 changed files with 1796 additions and 1 deletions

View file

@ -2393,6 +2393,13 @@ GLIBCXX_3.4.29 {
# std::once_flag::_M_finish(bool)
_ZNSt9once_flag9_M_finishEb;
# std::to_chars(char*, char*, [float|double|long double])
_ZSt8to_charsPcS_[defg];
# std::to_chars(char*, char*, [float|double|long double], chars_format)
_ZSt8to_charsPcS_[defg]St12chars_format;
# std::to_chars(char*, char*, [float|double|long double], chars_format, int)
_ZSt8to_charsPcS_[defg]St12chars_formati;
} GLIBCXX_3.4.28;
# Symbols in the support library (libsupc++) have their own tag.

View file

@ -702,6 +702,30 @@ namespace __detail
chars_format __fmt = chars_format::general) noexcept;
#endif
// Floating-point std::to_chars
// Overloads for float.
to_chars_result to_chars(char* __first, char* __last, float __value) noexcept;
to_chars_result to_chars(char* __first, char* __last, float __value,
chars_format __fmt) noexcept;
to_chars_result to_chars(char* __first, char* __last, float __value,
chars_format __fmt, int __precision) noexcept;
// Overloads for double.
to_chars_result to_chars(char* __first, char* __last, double __value) noexcept;
to_chars_result to_chars(char* __first, char* __last, double __value,
chars_format __fmt) noexcept;
to_chars_result to_chars(char* __first, char* __last, double __value,
chars_format __fmt, int __precision) noexcept;
// Overloads for long double.
to_chars_result to_chars(char* __first, char* __last, long double __value)
noexcept;
to_chars_result to_chars(char* __first, char* __last, long double __value,
chars_format __fmt) noexcept;
to_chars_result to_chars(char* __first, char* __last, long double __value,
chars_format __fmt, int __precision) noexcept;
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std
#endif // C++14

View file

@ -51,6 +51,7 @@ endif
sources = \
floating_from_chars.cc \
floating_to_chars.cc \
fs_dir.cc \
fs_ops.cc \
fs_path.cc \

View file

@ -124,7 +124,7 @@ LTLIBRARIES = $(noinst_LTLIBRARIES)
libc__17convenience_la_LIBADD =
@ENABLE_DUAL_ABI_TRUE@am__objects_1 = cow-fs_dir.lo cow-fs_ops.lo \
@ENABLE_DUAL_ABI_TRUE@ cow-fs_path.lo
am__objects_2 = floating_from_chars.lo fs_dir.lo fs_ops.lo fs_path.lo \
am__objects_2 = floating_from_chars.lo floating_to_chars.lo fs_dir.lo fs_ops.lo fs_path.lo \
memory_resource.lo $(am__objects_1)
@ENABLE_DUAL_ABI_TRUE@am__objects_3 = cow-string-inst.lo
@ENABLE_EXTERN_TEMPLATE_TRUE@am__objects_4 = ostream-inst.lo \
@ -440,6 +440,7 @@ headers =
sources = \
floating_from_chars.cc \
floating_to_chars.cc \
fs_dir.cc \
fs_ops.cc \
fs_path.cc \

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,199 @@
// Copyright (C) 2020 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// <charconv> is supported in C++14 as a GNU extension, but this test uses C++17
// hexadecimal floating-point literals.
// { dg-do run { target c++17 } }
// { dg-xfail-run-if "Ryu needs __int128" { large_long_double && { ! int128 } } }
#include <charconv>
#include <cmath>
#include <cstring>
#include <iterator>
#include <limits>
#include <testsuite_hooks.h>
using namespace std;
// The long double overloads of std::to_chars currently just go through printf
// (except for the hexadecimal formatting).
// Test our hand-written hexadecimal formatting implementation.
void
test01()
{
const long double hex_testcases[]
= { nextdownl(numeric_limits<long double>::max()),
nextupl(numeric_limits<long double>::min()),
42.0L,
0x1.2p+0L,
0x1.23p+0L,
0x1.234p+0L,
0x1.2345p+0L,
0x1.23456p+0L,
0x1.234567p+0L,
0x1.2345678p+0L,
0x1.23456789p+0L,
0x1.23456789p+0L,
0x1.23456789ap+0L,
0x1.23456789abp+0L,
0x1.23456789abcp+0L,
0x1.23456789abcdp+0L,
0x1.23456789abcdep+0L,
0x1.23456789abcdefp+0L,
0x1.23456789abcdef0p+0L,
0x1.23456789abcdef01p+0L,
0x1.23456789abcdef012p+0L,
0x1.23456789abcdef0123p+0L,
0x1.23456789abcdef01234p+0L,
0x1.23456789abcdef012345p+0L,
0x1.23456789abcdef0123456p+0L,
0x1.23456789abcdef01234567p+0L,
0x1.23456789abcdef012345678p+0L,
0x1.23456789abcdef0123456789p+0L,
0x1.23456789abcdef0123456789ap+0L,
0x1.23456789abcdef0123456789abp+0L,
0x1.23456789abcdef0123456789abcp+0L,
0x1.23456789abcdef0123456789abcdp+0L,
};
for (int exponent : {-11000, -3000, -300, -50, -7, 0, 7, 50, 300, 3000, 11000})
for (long double testcase : hex_testcases)
{
testcase = ldexpl(testcase, exponent);
if (testcase == 0.0L || isinf(testcase))
continue;
char to_chars_buffer[1024], printf_buffer[1024];
memset(to_chars_buffer, '\0', sizeof(to_chars_buffer));
memset(printf_buffer, '\0', sizeof(printf_buffer));
auto result = to_chars(begin(to_chars_buffer), end(to_chars_buffer),
testcase, chars_format::hex);
VERIFY( result.ec == errc{} );
*result.ptr = '\0';
sprintf(printf_buffer, "%La", testcase);
VERIFY( !strcmp(to_chars_buffer, printf_buffer+strlen("0x")) );
{
// Verify that the nearby values have a different shortest form.
testcase = nextdownl(testcase);
result = to_chars(begin(to_chars_buffer), end(to_chars_buffer),
testcase, chars_format::hex);
VERIFY( result.ec == errc{} );
*result.ptr = '\0';
VERIFY( strcmp(to_chars_buffer, printf_buffer+strlen("0x")) != 0);
sprintf(printf_buffer, "%La", testcase);
VERIFY( !strcmp(to_chars_buffer, printf_buffer+strlen("0x")) );
testcase = nextupl(nextupl(testcase));
result = to_chars(begin(to_chars_buffer), end(to_chars_buffer),
testcase, chars_format::hex);
VERIFY( result.ec == errc{} );
*result.ptr = '\0';
VERIFY( strcmp(to_chars_buffer, printf_buffer+strlen("0x")) != 0);
sprintf(printf_buffer, "%La", testcase);
VERIFY( !strcmp(to_chars_buffer, printf_buffer+strlen("0x")) );
testcase = nextdownl(testcase);
}
for (int precision = -1; precision < 50; precision++)
{
result = to_chars(begin(to_chars_buffer), end(to_chars_buffer),
testcase, chars_format::hex, precision);
VERIFY( result.ec == errc{} );
*result.ptr = '\0';
sprintf(printf_buffer, "%.*La", precision, testcase);
VERIFY( !strcmp(to_chars_buffer, printf_buffer+strlen("0x")) );
}
}
}
// Test the rest of the formatting modes, which go through printf.
void
test02()
{
const long double growth_factor = 1.442695040888963407359924681001892137L;
for (chars_format fmt : {chars_format::fixed, chars_format::scientific,
chars_format::general})
for (long double __value = 1.0L, count = 0; !isinf(__value);
++count <= 100.0L ? __value *= growth_factor : __value *= __value)
for (const long double value : {__value, 1.0L/__value})
{
for (const int precision : {-1, 0, 10, 100, 10000})
{
const char* const printf_specifier
= (fmt == chars_format::fixed ? "%.*Lf"
: fmt == chars_format::scientific ? "%.*Le"
: fmt == chars_format::general ? "%.*Lg"
: nullptr);
unsigned output_length = snprintf(nullptr, 0, printf_specifier,
precision, value);
char printf_buffer[output_length+1];
snprintf(printf_buffer, output_length+1, printf_specifier,
precision, value);
char to_chars_buffer[output_length];
auto result = to_chars(to_chars_buffer,
to_chars_buffer+output_length,
value, fmt, precision);
VERIFY( result.ec == errc{} );
VERIFY( !memcmp(printf_buffer, to_chars_buffer, output_length) );
result = to_chars(to_chars_buffer,
to_chars_buffer+output_length-1,
value, fmt, precision);
VERIFY( result.ec == errc::value_too_large );
}
// Verify that the nearby values have a different shortest form.
char to_chars_buffer[50000];
auto result = to_chars(begin(to_chars_buffer), end(to_chars_buffer),
value, fmt);
VERIFY( result.ec == errc{} );
*result.ptr = '\0';
char nearby_buffer[50000];
{
const long double smaller = nextdownl(value);
result = to_chars(begin(nearby_buffer), end(nearby_buffer),
smaller, fmt);
VERIFY( result.ec == errc{} );
*result.ptr = '\0';
VERIFY( strcmp(to_chars_buffer, nearby_buffer) != 0 );
}
{
long double larger = nextupl(value);
result = to_chars(begin(nearby_buffer), end(nearby_buffer),
larger, fmt);
VERIFY( result.ec == errc{} );
*result.ptr = '\0';
VERIFY( strcmp(to_chars_buffer, nearby_buffer) != 0 );
}
}
}
int
main()
{
test01();
test02();
}