libstdc++: Fix handling of field width for wide strings and characters [PR119593]
This patch corrects handling of UTF-32LE and UTF32-BE in __unicode::__literal_encoding_is_unicode<_CharT>, so they are recognized as unicode and functions produces correct result for wchar_t. Use `__unicode::__field_width` to compute the estimated witdh of the charcter for unicode wide encoding. PR libstdc++/119593 libstdc++-v3/ChangeLog: * include/bits/unicode.h (__unicode::__literal_encoding_is_unicode<_CharT>): Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix. * include/std/format (__formatter_str::_S_character_width): Define. (__formatter_str::_S_character_width): Updated passed char length. * testsuite/std/format/functions/format.cc: Test for wchar_t. Reviewed-by: Jonathan Wakely <jwakely@redhat.com> Signed-off-by: Tomasz Kamiński <tkaminsk@redhat.com>
This commit is contained in:
parent
70bf0ee440
commit
5c7f6272f4
3 changed files with 23 additions and 3 deletions
|
@ -1039,6 +1039,8 @@ inline namespace __v16_0_0
|
|||
string_view __s(__enc);
|
||||
if (__s.ends_with("//"))
|
||||
__s.remove_suffix(2);
|
||||
if (__s.ends_with("LE") || __s.ends_with("BE"))
|
||||
__s.remove_suffix(2);
|
||||
return __s == "16" || __s == "32";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1277,12 +1277,26 @@ namespace __format
|
|||
_M_spec);
|
||||
}
|
||||
|
||||
[[__gnu__::__always_inline__]]
|
||||
static size_t
|
||||
_S_character_width(_CharT __c)
|
||||
{
|
||||
// N.B. single byte cannot encode charcter of width greater than 1
|
||||
if constexpr (sizeof(_CharT) > 1u &&
|
||||
__unicode::__literal_encoding_is_unicode<_CharT>())
|
||||
return __unicode::__field_width(__c);
|
||||
else
|
||||
return 1u;
|
||||
}
|
||||
|
||||
template<typename _Out>
|
||||
typename basic_format_context<_Out, _CharT>::iterator
|
||||
_M_format_character(_CharT __c,
|
||||
basic_format_context<_Out, _CharT>& __fc) const
|
||||
{
|
||||
return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, _M_spec);
|
||||
return __format::__write_padded_as_spec({&__c, 1u},
|
||||
_S_character_width(__c),
|
||||
__fc, _M_spec);
|
||||
}
|
||||
|
||||
template<typename _Int>
|
||||
|
|
|
@ -501,9 +501,14 @@ test_unicode()
|
|||
{
|
||||
// Similar to sC example in test_std_examples, but not from the standard.
|
||||
// Verify that the character "🤡" has estimated field width 2,
|
||||
// rather than estimated field width equal to strlen("🤡"), which would be 4.
|
||||
// rather than estimated field width equal to strlen("🤡"), which would be 4,
|
||||
// or just width 1 for single character.
|
||||
std::string sC = std::format("{:*<3}", "🤡");
|
||||
VERIFY( sC == "🤡*" );
|
||||
std::wstring wsC = std::format(L"{:*<3}", L"🤡");
|
||||
VERIFY( wsC == L"🤡*" );
|
||||
wsC = std::format(L"{:*<3}", L'🤡');
|
||||
VERIFY( wsC == L"🤡*" );
|
||||
|
||||
// Verify that "£" has estimated field width 1, not strlen("£") == 2.
|
||||
std::string sL = std::format("{:*<3}", "£");
|
||||
|
@ -517,7 +522,6 @@ test_unicode()
|
|||
std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡");
|
||||
VERIFY( sP == "£ *" );
|
||||
sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡");
|
||||
VERIFY( sP == "£* **" );
|
||||
|
||||
// Verify field width handling for extended grapheme clusters,
|
||||
// and that a cluster gets output as a single item, not truncated.
|
||||
|
|
Loading…
Add table
Reference in a new issue