libstdc++: Fix handling of field width for wide strings and characters [PR119593]

This patch corrects handling of UTF-32LE and UTF32-BE in
__unicode::__literal_encoding_is_unicode<_CharT>, so they are
recognized as unicode and functions produces correct result for wchar_t.

Use `__unicode::__field_width` to compute the estimated witdh
of the charcter for unicode wide encoding.

	PR libstdc++/119593

libstdc++-v3/ChangeLog:

	* include/bits/unicode.h
	(__unicode::__literal_encoding_is_unicode<_CharT>):
	Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix.
	* include/std/format (__formatter_str::_S_character_width):
	Define.
	(__formatter_str::_S_character_width): Updated passed char
	length.
	* testsuite/std/format/functions/format.cc: Test for wchar_t.

Reviewed-by: Jonathan Wakely <jwakely@redhat.com>
Signed-off-by: Tomasz Kamiński <tkaminsk@redhat.com>
This commit is contained in:
Tomasz Kamiński 2025-04-03 10:23:45 +02:00
parent 70bf0ee440
commit 5c7f6272f4
3 changed files with 23 additions and 3 deletions

View file

@ -1039,6 +1039,8 @@ inline namespace __v16_0_0
string_view __s(__enc);
if (__s.ends_with("//"))
__s.remove_suffix(2);
if (__s.ends_with("LE") || __s.ends_with("BE"))
__s.remove_suffix(2);
return __s == "16" || __s == "32";
}
}

View file

@ -1277,12 +1277,26 @@ namespace __format
_M_spec);
}
[[__gnu__::__always_inline__]]
static size_t
_S_character_width(_CharT __c)
{
// N.B. single byte cannot encode charcter of width greater than 1
if constexpr (sizeof(_CharT) > 1u &&
__unicode::__literal_encoding_is_unicode<_CharT>())
return __unicode::__field_width(__c);
else
return 1u;
}
template<typename _Out>
typename basic_format_context<_Out, _CharT>::iterator
_M_format_character(_CharT __c,
basic_format_context<_Out, _CharT>& __fc) const
{
return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, _M_spec);
return __format::__write_padded_as_spec({&__c, 1u},
_S_character_width(__c),
__fc, _M_spec);
}
template<typename _Int>

View file

@ -501,9 +501,14 @@ test_unicode()
{
// Similar to sC example in test_std_examples, but not from the standard.
// Verify that the character "🤡" has estimated field width 2,
// rather than estimated field width equal to strlen("🤡"), which would be 4.
// rather than estimated field width equal to strlen("🤡"), which would be 4,
// or just width 1 for single character.
std::string sC = std::format("{:*<3}", "🤡");
VERIFY( sC == "🤡*" );
std::wstring wsC = std::format(L"{:*<3}", L"🤡");
VERIFY( wsC == L"🤡*" );
wsC = std::format(L"{:*<3}", L'🤡');
VERIFY( wsC == L"🤡*" );
// Verify that "£" has estimated field width 1, not strlen("£") == 2.
std::string sL = std::format("{:*<3}", "£");
@ -517,7 +522,6 @@ test_unicode()
std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡");
VERIFY( sP == "£ *" );
sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡");
VERIFY( sP == "£* **" );
// Verify field width handling for extended grapheme clusters,
// and that a cluster gets output as a single item, not truncated.