libstdc++: Fix handling of field width for wide strings and characters [PR119593]

This patch corrects handling of UTF-32LE and UTF32-BE in __unicode::__literal_encoding_is_unicode<_CharT>, so they are recognized as unicode and functions produces correct result for wchar_t. Use `__unicode::__field_width` to compute the estimated witdh of the charcter for unicode wide encoding. PR libstdc++/119593 libstdc++-v3/ChangeLog: * include/bits/unicode.h (__unicode::__literal_encoding_is_unicode<_CharT>): Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix. * include/std/format (__formatter_str::_S_character_width): Define. (__formatter_str::_S_character_width): Updated passed char length. * testsuite/std/format/functions/format.cc: Test for wchar_t. Reviewed-by: Jonathan Wakely <jwakely@redhat.com> Signed-off-by: Tomasz Kamiński <tkaminsk@redhat.com>
2025-04-03 10:23:45 +02:00 · 2025-04-03 10:23:45 +02:00 · 5c7f6272f4
commit 5c7f6272f4
parent 70bf0ee440
3 changed files with 23 additions and 3 deletions
--- a/libstdc++-v3/include/bits/unicode.h
+++ b/libstdc++-v3/include/bits/unicode.h
@ -1039,6 +1039,8 @@ inline namespace __v16_0_0
 	      string_view __s(__enc);
 	      if (__s.ends_with("//"))
 		__s.remove_suffix(2);
+	      if (__s.ends_with("LE") || __s.ends_with("BE"))
+		__s.remove_suffix(2);
 	      return __s == "16" || __s == "32";
 	    }
 	}
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@ -1277,12 +1277,26 @@ namespace __format
 						  _M_spec);
 	}

+      [[__gnu__::__always_inline__]]
+      static size_t
+      _S_character_width(_CharT __c)
+      {
+	// N.B. single byte cannot encode charcter of width greater than 1
+	if constexpr (sizeof(_CharT) > 1u && 
+			__unicode::__literal_encoding_is_unicode<_CharT>())
+	  return __unicode::__field_width(__c);
+	else
+	  return 1u;
+      }
+
      template<typename _Out>
 	typename basic_format_context<_Out, _CharT>::iterator
 	_M_format_character(_CharT __c,
 		      basic_format_context<_Out, _CharT>& __fc) const
 	{
-	  return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, _M_spec);
+	  return __format::__write_padded_as_spec({&__c, 1u},
+						  _S_character_width(__c),
+			                          __fc, _M_spec);
 	}

      template<typename _Int>
--- a/libstdc++-v3/testsuite/std/format/functions/format.cc
+++ b/libstdc++-v3/testsuite/std/format/functions/format.cc
@ -501,9 +501,14 @@ test_unicode()
 {
  // Similar to sC example in test_std_examples, but not from the standard.
  // Verify that the character "🤡" has estimated field width 2,
-  // rather than estimated field width equal to strlen("🤡"), which would be 4.
+  // rather than estimated field width equal to strlen("🤡"), which would be 4,
+  // or just width 1 for single character.
  std::string sC = std::format("{:*<3}", "🤡");
  VERIFY( sC == "🤡*" );
+  std::wstring wsC = std::format(L"{:*<3}", L"🤡");
+  VERIFY( wsC == L"🤡*" );
+  wsC = std::format(L"{:*<3}", L'🤡');
+  VERIFY( wsC == L"🤡*" );

  // Verify that "£" has estimated field width 1, not strlen("£") == 2.
  std::string sL = std::format("{:*<3}", "£");
@ -517,7 +522,6 @@ test_unicode()
  std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡");
  VERIFY( sP == "£ *" );
  sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡");
-  VERIFY( sP == "£* **" );

  // Verify field width handling for extended grapheme clusters,
  // and that a cluster gets output as a single item, not truncated.