libstdc++: Implement debug format for strings and characters formatters [PR109162]

This patch implements part P2286R8 that specified debug (escaped)
format for the strings and characters sequences. This include both
handling of the '?' format specifier and set_debug_format member.

To indicate partial support we define __glibcxx_format_ranges macro
value 1, without defining __cpp_lib_format_ranges.

We provide two separate escaping routines depending on the literal
encoding for the corresponding character types. If the character
encoding is Unicode, we follow the specification for the standard
(__format::__write_escaped_unicode).
For other encodings, we escape only characters in range [0x00, 0x80),
interpreting them as ASCII values: [0x00, 0x20), 0x7f and  '\t', '\r',
'\n', '\\', '"', '\'' are escaped. We assume every character outside
this range is printable (__format::_write_escaped_ascii).
In particular we do not yet implement special handling of shift
sequences.

For Unicode escaping a new __unicode::__escape_edges table is introduced,
that encodes information if character belongs to General_Category that is
escaped by the standard (Control or Other). This table is generated from
DerivedGeneralCategory.txt provided by Unicode. Only boolean flag is
preserved to reduce the number of entries. The additional rules for escaping
are handled by __format::__should_escape_unicode.

When width or precision is specified, we emit escaped string to the temporary
buffer and format the resulting string according to the format spec.
For characters use a fixed size stack buffer, for which a new _Fixedbuf_sink is
introduced. For strings, we use _Str_sink and to avoid allocations,
we compute the estimated size of (possibly truncated) input, and if it is
larger than width field we print directly.

	PR libstdc++/109162

contrib/ChangeLog:

	* unicode/README: Mentioned DerivedGeneralCategory.txt.
	* unicode/gen_libstdcxx_unicode_data.py: Generation __escape_edges
	table from DerivedGeneralCategory.txt. Update file name in comments.
	* unicode/DerivedGeneralCategory.txt: Copy of file distributed by
	Unicode Consortium.

libstdc++-v3/ChangeLog:

	* include/bits/chrono_io.h (__detail::_Widen): Moved to std/format file.
	* include/bits/unicode-data.h: Regnerate.
	* include/bits/unicode.h (__unicode::_Utf_iterator::_M_units)
	(__unicode::__should_escape_category): Define.
	* include/std/format (_GLIBCXX_WIDEN_, _GLIBCXX_WIDEN):	Copied from
	include/bits/chrono_io.h.
	(__format::_Widen): Moved from include/bits/chrono_io.h.
	(__format::_Term_char, __format::_Escapes, __format::_Separators)
	(__format::__should_escape_ascii, __format::__should_escape_unicode)
	(__format::__write_escape_seq, __format::__write_escaped_char)
	(__format::__write_escaped_acii, __format::__write_escaped_unicode)
	(__format::__write_escaped): Define.
	(__formatter_str::_S_trunc): Extracted truncation of character
	sequences.
	(__formatter_str::format): Handle _Pres_esc.
	(__formatter_int::_M_do_parse) [__glibcxx_format_ranges]: Parse '?'.
	(__formatter_int::_M_format_character_escaped): Define.
	(formatter<_CharT, _CharT>::format, formatter<char, wchar_t>::format):
	Handle _Pres_esc.
	(__formatter_str::set_debug_format, formatter<...>::set_debug_format)
	Guard with __glibcxx_format_ranges.
	(__format::_Fixedbuf_sink): Define.
	* testsuite/23_containers/vector/bool/format.cc: Use __format::_Widen
	and remove unnecessary <chrono> include.
	* testsuite/std/format/debug.cc: New test.
	* testsuite/std/format/debug_nonunicode.cc: New test.
	* testsuite/std/format/parse_ctx.cc (escaped_strings_supported): Define
	to true if __glibcxx_format_ranges is defined.
	* testsuite/std/format/string.cc (escaped_strings_supported): Define to
	true if __glibcxx_format_ranges is defined.

Reviewed-by: Jonathan Wakely <jwakely@redhat.com>
Signed-off-by: Tomasz Kamiński <tkaminsk@redhat.com>
This commit is contained in:
Tomasz Kamiński 2025-04-02 14:19:26 +02:00
parent b57d7ef4bd
commit 3b33d792cf
12 changed files with 5538 additions and 86 deletions

File diff suppressed because it is too large Load diff

View file

@ -16,10 +16,11 @@ ftp://ftp.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt
ftp://ftp.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
ftp://ftp.unicode.org/Public/UNIDATA/NameAliases.txt
Two additional files are needed for lookup tables in libstdc++:
Three additional files are needed for lookup tables in libstdc++:
ftp://ftp.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakProperty.txt
ftp://ftp.unicode.org/Public/UNIDATA/emoji/emoji-data.txt
ftp://ftp.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt
All these files have been added to source control in this directory;
please see unicode-license.txt for the relevant copyright information.

View file

@ -126,7 +126,7 @@ edges = find_edges(all_code_points, 1)
# Table for std::__unicode::__format_width(char32_t)
print(" // Table generated by contrib/unicode/gen_std_format_width.py,")
print(" // Table generated by contrib/unicode/gen_libstdcxx_unicode_data.py,")
print(" // from EastAsianWidth.txt from the Unicode standard.");
print(" inline constexpr char32_t __width_edges[] = {", end="")
for i, e in enumerate(edges):
@ -138,6 +138,45 @@ for i, e in enumerate(edges):
print("{:#x},".format(c), end="")
print("\n };\n")
# By default escape each code point
all_code_points = [True] * (1 + 0x10FFFF)
escaped_general_categories = {
# Separator (Z)
"Zs", "Zl", "Zp",
# Other (C)
"Cc", "Cf", "Cs", "Co", "Cn",
}
# Extract General_Category and detrmine if it should be escaped
# for all code points.
for line in open("DerivedGeneralCategory.txt", "r"):
# Example lines:
# 0530 ; Cn # <reserved-0530>
# 0557..0558 ; Cn # [2] <reserved-0557>..<reserved-0558>
line = line.split("#")[0]
if re.match(r'^[\dA-Fa-f][^;]+;', line):
code_points, general_category = line.split(";")
gc_escaped = general_category.strip() in escaped_general_categories
process_code_points(code_points, gc_escaped)
edges = find_edges(all_code_points)
shift_bits = 1
print(" // Values generated by contrib/unicode/gen_libstdcxx_unicode_data.py,")
print(" // from DerivedGeneralCategory.txt from the Unicode standard.");
print(" // Entries are (code_point << 1) + escape.")
print(" inline constexpr uint32_t __escape_edges[] = {", end="")
for i, e in enumerate(edges):
if i % 6:
print(" ", end="")
else:
print("\n ", end="")
c, p = e
x = (c << shift_bits) + (1 if p else 0)
print("{0:#x},".format(x), end="")
print("\n };\n")
# By default every code point has Grapheme_Cluster_Break=Other.
all_code_points = ["Other"] * (1 + 0x10FFFF)
@ -167,7 +206,7 @@ print(" };\n")
# Tables for std::__unicode::_Grapheme_cluster_state
print(" // Values generated by contrib/unicode/gen_std_format_width.py,")
print(" // Values generated by contrib/unicode/gen_libstdcxx_unicode_data.py,")
print(" // from GraphemeBreakProperty.txt from the Unicode standard.");
print(" // Entries are (code_point << shift_bits) + property.")
print(" inline constexpr int __gcb_shift_bits = {:#x};".format(shift_bits))
@ -209,7 +248,7 @@ edges = find_edges(all_code_points)
incb_props = {None:0, "Consonant":1, "Extend":2}
print(" enum class _InCB { _Consonant = 1, _Extend = 2 };\n")
# Table for std::__unicode::__incb_property
print(" // Values generated by contrib/unicode/gen_std_format_width.py,")
print(" // Values generated by contrib/unicode/gen_libstdcxx_unicode_data.py,")
print(" // from DerivedCoreProperties.txt from the Unicode standard.");
print(" // Entries are (code_point << 2) + property.")
print(" inline constexpr uint32_t __incb_edges[] = {", end="")
@ -238,7 +277,7 @@ for line in open("emoji-data.txt", "r"):
edges = find_edges(all_code_points, False)
# Table for std::__unicode::__is_extended_pictographic
print(" // Table generated by contrib/unicode/gen_std_format_width.py,")
print(" // Table generated by contrib/unicode/gen_libstdcxx_unicode_data.py,")
print(" // from emoji-data.txt from the Unicode standard.");
print(" inline constexpr char32_t __xpicto_edges[] = {", end="")
for i, e in enumerate(edges):

View file

@ -57,21 +57,7 @@ namespace chrono
/// @cond undocumented
namespace __detail
{
// STATICALLY-WIDEN, see C++20 [time.general]
// It doesn't matter for format strings (which can only be char or wchar_t)
// but this returns the narrow string for anything that isn't wchar_t. This
// is done because const char* can be inserted into any ostream type, and
// will be widened at runtime if necessary.
template<typename _CharT>
consteval auto
_Widen(const char* __narrow, const wchar_t* __wide)
{
if constexpr (is_same_v<_CharT, wchar_t>)
return __wide;
else
return __narrow;
}
#define _GLIBCXX_WIDEN_(C, S) ::std::chrono::__detail::_Widen<C>(S, L##S)
#define _GLIBCXX_WIDEN_(C, S) ::std::__format::_Widen<C>(S, L##S)
#define _GLIBCXX_WIDEN(S) _GLIBCXX_WIDEN_(_CharT, S)
template<typename _Period, typename _CharT>

View file

@ -33,7 +33,7 @@
# error "Version mismatch for Unicode static data"
#endif
// Table generated by contrib/unicode/gen_std_format_width.py,
// Table generated by contrib/unicode/gen_libstdcxx_unicode_data.py,
// from EastAsianWidth.txt from the Unicode standard.
inline constexpr char32_t __width_edges[] = {
0x1100, 0x1160, 0x231a, 0x231c, 0x2329, 0x232b, 0x23e9, 0x23ed,
@ -64,6 +64,258 @@
0x1faf0, 0x1faf9, 0x20000, 0x2fffe, 0x30000, 0x3fffe,
};
// Values generated by contrib/unicode/gen_libstdcxx_unicode_data.py,
// from DerivedGeneralCategory.txt from the Unicode standard.
// Entries are (code_point << 1) + escape.
inline constexpr uint32_t __escape_edges[] = {
0x1, 0x42, 0xff, 0x142, 0x15b, 0x15c,
0x6f1, 0x6f4, 0x701, 0x708, 0x717, 0x718,
0x71b, 0x71c, 0x745, 0x746, 0xa61, 0xa62,
0xaaf, 0xab2, 0xb17, 0xb1a, 0xb21, 0xb22,
0xb91, 0xba0, 0xbd7, 0xbde, 0xbeb, 0xc0c,
0xc39, 0xc3a, 0xdbb, 0xdbc, 0xe1d, 0xe20,
0xe97, 0xe9a, 0xf65, 0xf80, 0xff7, 0xffa,
0x105d, 0x1060, 0x107f, 0x1080, 0x10b9, 0x10bc,
0x10bf, 0x10c0, 0x10d7, 0x10e0, 0x111f, 0x112e,
0x11c5, 0x11c6, 0x1309, 0x130a, 0x131b, 0x131e,
0x1323, 0x1326, 0x1353, 0x1354, 0x1363, 0x1364,
0x1367, 0x136c, 0x1375, 0x1378, 0x138b, 0x138e,
0x1393, 0x1396, 0x139f, 0x13ae, 0x13b1, 0x13b8,
0x13bd, 0x13be, 0x13c9, 0x13cc, 0x13ff, 0x1402,
0x1409, 0x140a, 0x1417, 0x141e, 0x1423, 0x1426,
0x1453, 0x1454, 0x1463, 0x1464, 0x1469, 0x146a,
0x146f, 0x1470, 0x1475, 0x1478, 0x147b, 0x147c,
0x1487, 0x148e, 0x1493, 0x1496, 0x149d, 0x14a2,
0x14a5, 0x14b2, 0x14bb, 0x14bc, 0x14bf, 0x14cc,
0x14ef, 0x1502, 0x1509, 0x150a, 0x151d, 0x151e,
0x1525, 0x1526, 0x1553, 0x1554, 0x1563, 0x1564,
0x1569, 0x156a, 0x1575, 0x1578, 0x158d, 0x158e,
0x1595, 0x1596, 0x159d, 0x15a0, 0x15a3, 0x15c0,
0x15c9, 0x15cc, 0x15e5, 0x15f2, 0x1601, 0x1602,
0x1609, 0x160a, 0x161b, 0x161e, 0x1623, 0x1626,
0x1653, 0x1654, 0x1663, 0x1664, 0x1669, 0x166a,
0x1675, 0x1678, 0x168b, 0x168e, 0x1693, 0x1696,
0x169d, 0x16aa, 0x16b1, 0x16b8, 0x16bd, 0x16be,
0x16c9, 0x16cc, 0x16f1, 0x1704, 0x1709, 0x170a,
0x1717, 0x171c, 0x1723, 0x1724, 0x172d, 0x1732,
0x1737, 0x1738, 0x173b, 0x173c, 0x1741, 0x1746,
0x174b, 0x1750, 0x1757, 0x175c, 0x1775, 0x177c,
0x1787, 0x178c, 0x1793, 0x1794, 0x179d, 0x17a0,
0x17a3, 0x17ae, 0x17b1, 0x17cc, 0x17f7, 0x1800,
0x181b, 0x181c, 0x1823, 0x1824, 0x1853, 0x1854,
0x1875, 0x1878, 0x188b, 0x188c, 0x1893, 0x1894,
0x189d, 0x18aa, 0x18af, 0x18b0, 0x18b7, 0x18ba,
0x18bd, 0x18c0, 0x18c9, 0x18cc, 0x18e1, 0x18ee,
0x191b, 0x191c, 0x1923, 0x1924, 0x1953, 0x1954,
0x1969, 0x196a, 0x1975, 0x1978, 0x198b, 0x198c,
0x1993, 0x1994, 0x199d, 0x19aa, 0x19af, 0x19ba,
0x19bf, 0x19c0, 0x19c9, 0x19cc, 0x19e1, 0x19e2,
0x19e9, 0x1a00, 0x1a1b, 0x1a1c, 0x1a23, 0x1a24,
0x1a8b, 0x1a8c, 0x1a93, 0x1a94, 0x1aa1, 0x1aa8,
0x1ac9, 0x1acc, 0x1b01, 0x1b02, 0x1b09, 0x1b0a,
0x1b2f, 0x1b34, 0x1b65, 0x1b66, 0x1b79, 0x1b7a,
0x1b7d, 0x1b80, 0x1b8f, 0x1b94, 0x1b97, 0x1b9e,
0x1bab, 0x1bac, 0x1baf, 0x1bb0, 0x1bc1, 0x1bcc,
0x1be1, 0x1be4, 0x1beb, 0x1c02, 0x1c77, 0x1c7e,
0x1cb9, 0x1d02, 0x1d07, 0x1d08, 0x1d0b, 0x1d0c,
0x1d17, 0x1d18, 0x1d49, 0x1d4a, 0x1d4d, 0x1d4e,
0x1d7d, 0x1d80, 0x1d8b, 0x1d8c, 0x1d8f, 0x1d90,
0x1d9f, 0x1da0, 0x1db5, 0x1db8, 0x1dc1, 0x1e00,
0x1e91, 0x1e92, 0x1edb, 0x1ee2, 0x1f31, 0x1f32,
0x1f7b, 0x1f7c, 0x1f9b, 0x1f9c, 0x1fb7, 0x2000,
0x218d, 0x218e, 0x2191, 0x219a, 0x219d, 0x21a0,
0x2493, 0x2494, 0x249d, 0x24a0, 0x24af, 0x24b0,
0x24b3, 0x24b4, 0x24bd, 0x24c0, 0x2513, 0x2514,
0x251d, 0x2520, 0x2563, 0x2564, 0x256d, 0x2570,
0x257f, 0x2580, 0x2583, 0x2584, 0x258d, 0x2590,
0x25af, 0x25b0, 0x2623, 0x2624, 0x262d, 0x2630,
0x26b7, 0x26ba, 0x26fb, 0x2700, 0x2735, 0x2740,
0x27ed, 0x27f0, 0x27fd, 0x2800, 0x2d01, 0x2d02,
0x2d3b, 0x2d40, 0x2df3, 0x2e00, 0x2e2d, 0x2e3e,
0x2e6f, 0x2e80, 0x2ea9, 0x2ec0, 0x2edb, 0x2edc,
0x2ee3, 0x2ee4, 0x2ee9, 0x2f00, 0x2fbd, 0x2fc0,
0x2fd5, 0x2fe0, 0x2ff5, 0x3000, 0x301d, 0x301e,
0x3035, 0x3040, 0x30f3, 0x3100, 0x3157, 0x3160,
0x31ed, 0x3200, 0x323f, 0x3240, 0x3259, 0x3260,
0x3279, 0x3280, 0x3283, 0x3288, 0x32dd, 0x32e0,
0x32eb, 0x3300, 0x3359, 0x3360, 0x3395, 0x33a0,
0x33b7, 0x33bc, 0x3439, 0x343c, 0x34bf, 0x34c0,
0x34fb, 0x34fe, 0x3515, 0x3520, 0x3535, 0x3540,
0x355d, 0x3560, 0x359f, 0x3600, 0x369b, 0x369c,
0x37e9, 0x37f8, 0x3871, 0x3876, 0x3895, 0x389a,
0x3917, 0x3920, 0x3977, 0x397a, 0x3991, 0x39a0,
0x39f7, 0x3a00, 0x3e2d, 0x3e30, 0x3e3d, 0x3e40,
0x3e8d, 0x3e90, 0x3e9d, 0x3ea0, 0x3eb1, 0x3eb2,
0x3eb5, 0x3eb6, 0x3eb9, 0x3eba, 0x3ebd, 0x3ebe,
0x3efd, 0x3f00, 0x3f6b, 0x3f6c, 0x3f8b, 0x3f8c,
0x3fa9, 0x3fac, 0x3fb9, 0x3fba, 0x3fe1, 0x3fe4,
0x3feb, 0x3fec, 0x3fff, 0x4020, 0x4051, 0x4060,
0x40bf, 0x40e0, 0x40e5, 0x40e8, 0x411f, 0x4120,
0x413b, 0x4140, 0x4183, 0x41a0, 0x41e3, 0x4200,
0x4319, 0x4320, 0x4855, 0x4880, 0x4897, 0x48c0,
0x56e9, 0x56ec, 0x572d, 0x572e, 0x59e9, 0x59f2,
0x5a4d, 0x5a4e, 0x5a51, 0x5a5a, 0x5a5d, 0x5a60,
0x5ad1, 0x5ade, 0x5ae3, 0x5afe, 0x5b2f, 0x5b40,
0x5b4f, 0x5b50, 0x5b5f, 0x5b60, 0x5b6f, 0x5b70,
0x5b7f, 0x5b80, 0x5b8f, 0x5b90, 0x5b9f, 0x5ba0,
0x5baf, 0x5bb0, 0x5bbf, 0x5bc0, 0x5cbd, 0x5d00,
0x5d35, 0x5d36, 0x5de9, 0x5e00, 0x5fad, 0x5fe0,
0x6001, 0x6002, 0x6081, 0x6082, 0x612f, 0x6132,
0x6201, 0x620a, 0x6261, 0x6262, 0x631f, 0x6320,
0x63cd, 0x63de, 0x643f, 0x6440, 0x1491b, 0x14920,
0x1498f, 0x149a0, 0x14c59, 0x14c80, 0x14df1, 0x14e00,
0x14f9d, 0x14fa0, 0x14fa5, 0x14fa6, 0x14fa9, 0x14faa,
0x14fbb, 0x14fe4, 0x1505b, 0x15060, 0x15075, 0x15080,
0x150f1, 0x15100, 0x1518d, 0x1519c, 0x151b5, 0x151c0,
0x152a9, 0x152be, 0x152fb, 0x15300, 0x1539d, 0x1539e,
0x153b5, 0x153bc, 0x153ff, 0x15400, 0x1546f, 0x15480,
0x1549d, 0x154a0, 0x154b5, 0x154b8, 0x15587, 0x155b6,
0x155ef, 0x15602, 0x1560f, 0x15612, 0x1561f, 0x15622,
0x1562f, 0x15640, 0x1564f, 0x15650, 0x1565f, 0x15660,
0x156d9, 0x156e0, 0x157dd, 0x157e0, 0x157f5, 0x15800,
0x1af49, 0x1af60, 0x1af8f, 0x1af96, 0x1aff9, 0x1f200,
0x1f4dd, 0x1f4e0, 0x1f5b5, 0x1f600, 0x1f60f, 0x1f626,
0x1f631, 0x1f63a, 0x1f66f, 0x1f670, 0x1f67b, 0x1f67c,
0x1f67f, 0x1f680, 0x1f685, 0x1f686, 0x1f68b, 0x1f68c,
0x1f787, 0x1f7a6, 0x1fb21, 0x1fb24, 0x1fb91, 0x1fb9e,
0x1fba1, 0x1fbe0, 0x1fc35, 0x1fc40, 0x1fca7, 0x1fca8,
0x1fccf, 0x1fcd0, 0x1fcd9, 0x1fce0, 0x1fceb, 0x1fcec,
0x1fdfb, 0x1fe02, 0x1ff7f, 0x1ff84, 0x1ff91, 0x1ff94,
0x1ffa1, 0x1ffa4, 0x1ffb1, 0x1ffb4, 0x1ffbb, 0x1ffc0,
0x1ffcf, 0x1ffd0, 0x1ffdf, 0x1fff8, 0x1fffd, 0x20000,
0x20019, 0x2001a, 0x2004f, 0x20050, 0x20077, 0x20078,
0x2007d, 0x2007e, 0x2009d, 0x200a0, 0x200bd, 0x20100,
0x201f7, 0x20200, 0x20207, 0x2020e, 0x20269, 0x2026e,
0x2031f, 0x20320, 0x2033b, 0x20340, 0x20343, 0x203a0,
0x203fd, 0x20500, 0x2053b, 0x20540, 0x205a3, 0x205c0,
0x205f9, 0x20600, 0x20649, 0x2065a, 0x20697, 0x206a0,
0x206f7, 0x20700, 0x2073d, 0x2073e, 0x20789, 0x20790,
0x207ad, 0x20800, 0x2093d, 0x20940, 0x20955, 0x20960,
0x209a9, 0x209b0, 0x209f9, 0x20a00, 0x20a51, 0x20a60,
0x20ac9, 0x20ade, 0x20af7, 0x20af8, 0x20b17, 0x20b18,
0x20b27, 0x20b28, 0x20b2d, 0x20b2e, 0x20b45, 0x20b46,
0x20b65, 0x20b66, 0x20b75, 0x20b76, 0x20b7b, 0x20b80,
0x20be9, 0x20c00, 0x20e6f, 0x20e80, 0x20ead, 0x20ec0,
0x20ed1, 0x20f00, 0x20f0d, 0x20f0e, 0x20f63, 0x20f64,
0x20f77, 0x21000, 0x2100d, 0x21010, 0x21013, 0x21014,
0x2106d, 0x2106e, 0x21073, 0x21078, 0x2107b, 0x2107e,
0x210ad, 0x210ae, 0x2113f, 0x2114e, 0x21161, 0x211c0,
0x211e7, 0x211e8, 0x211ed, 0x211f6, 0x21239, 0x2123e,
0x21275, 0x2127e, 0x21281, 0x21300, 0x21371, 0x21378,
0x213a1, 0x213a4, 0x21409, 0x2140a, 0x2140f, 0x21418,
0x21429, 0x2142a, 0x21431, 0x21432, 0x2146d, 0x21470,
0x21477, 0x2147e, 0x21493, 0x214a0, 0x214b3, 0x214c0,
0x21541, 0x21580, 0x215cf, 0x215d6, 0x215ef, 0x21600,
0x2166d, 0x21672, 0x216ad, 0x216b0, 0x216e7, 0x216f0,
0x21725, 0x21732, 0x2173b, 0x21752, 0x21761, 0x21800,
0x21893, 0x21900, 0x21967, 0x21980, 0x219e7, 0x219f4,
0x21a51, 0x21a60, 0x21a75, 0x21a80, 0x21acd, 0x21ad2,
0x21b0d, 0x21b1c, 0x21b21, 0x21cc0, 0x21cff, 0x21d00,
0x21d55, 0x21d56, 0x21d5d, 0x21d60, 0x21d65, 0x21d84,
0x21d8b, 0x21df8, 0x21e51, 0x21e60, 0x21eb5, 0x21ee0,
0x21f15, 0x21f60, 0x21f99, 0x21fc0, 0x21fef, 0x22000,
0x2209d, 0x220a4, 0x220ed, 0x220fe, 0x2217b, 0x2217c,
0x22187, 0x221a0, 0x221d3, 0x221e0, 0x221f5, 0x22200,
0x2226b, 0x2226c, 0x22291, 0x222a0, 0x222ef, 0x22300,
0x223c1, 0x223c2, 0x223eb, 0x22400, 0x22425, 0x22426,
0x22485, 0x22500, 0x2250f, 0x22510, 0x22513, 0x22514,
0x2251d, 0x2251e, 0x2253d, 0x2253e, 0x22555, 0x22560,
0x225d7, 0x225e0, 0x225f5, 0x22600, 0x22609, 0x2260a,
0x2261b, 0x2261e, 0x22623, 0x22626, 0x22653, 0x22654,
0x22663, 0x22664, 0x22669, 0x2266a, 0x22675, 0x22676,
0x2268b, 0x2268e, 0x22693, 0x22696, 0x2269d, 0x226a0,
0x226a3, 0x226ae, 0x226b1, 0x226ba, 0x226c9, 0x226cc,
0x226db, 0x226e0, 0x226eb, 0x22700, 0x22715, 0x22716,
0x22719, 0x2271c, 0x2271f, 0x22720, 0x2276d, 0x2276e,
0x22783, 0x22784, 0x22787, 0x2278a, 0x2278d, 0x2278e,
0x22797, 0x22798, 0x227ad, 0x227ae, 0x227b3, 0x227c2,
0x227c7, 0x22800, 0x228b9, 0x228ba, 0x228c5, 0x22900,
0x22991, 0x229a0, 0x229b5, 0x22b00, 0x22b6d, 0x22b70,
0x22bbd, 0x22c00, 0x22c8b, 0x22ca0, 0x22cb5, 0x22cc0,
0x22cdb, 0x22d00, 0x22d75, 0x22d80, 0x22d95, 0x22da0,
0x22dc9, 0x22e00, 0x22e37, 0x22e3a, 0x22e59, 0x22e60,
0x22e8f, 0x23000, 0x23079, 0x23140, 0x231e7, 0x231fe,
0x2320f, 0x23212, 0x23215, 0x23218, 0x23229, 0x2322a,
0x2322f, 0x23230, 0x2326d, 0x2326e, 0x23273, 0x23276,
0x2328f, 0x232a0, 0x232b5, 0x23340, 0x23351, 0x23354,
0x233b1, 0x233b4, 0x233cb, 0x23400, 0x23491, 0x234a0,
0x23547, 0x23560, 0x235f3, 0x23600, 0x23615, 0x23780,
0x237c5, 0x237e0, 0x237f5, 0x23800, 0x23813, 0x23814,
0x2386f, 0x23870, 0x2388d, 0x238a0, 0x238db, 0x238e0,
0x23921, 0x23924, 0x23951, 0x23952, 0x2396f, 0x23a00,
0x23a0f, 0x23a10, 0x23a15, 0x23a16, 0x23a6f, 0x23a74,
0x23a77, 0x23a78, 0x23a7d, 0x23a7e, 0x23a91, 0x23aa0,
0x23ab5, 0x23ac0, 0x23acd, 0x23ace, 0x23ad3, 0x23ad4,
0x23b1f, 0x23b20, 0x23b25, 0x23b26, 0x23b33, 0x23b40,
0x23b55, 0x23dc0, 0x23df3, 0x23e00, 0x23e23, 0x23e24,
0x23e77, 0x23e7c, 0x23eb7, 0x23f60, 0x23f63, 0x23f80,
0x23fe5, 0x23ffe, 0x24735, 0x24800, 0x248df, 0x248e0,
0x248eb, 0x24900, 0x24a89, 0x25f20, 0x25fe7, 0x26000,
0x26861, 0x26880, 0x268ad, 0x268c0, 0x287f7, 0x28800,
0x28c8f, 0x2c200, 0x2c275, 0x2d000, 0x2d473, 0x2d480,
0x2d4bf, 0x2d4c0, 0x2d4d5, 0x2d4dc, 0x2d57f, 0x2d580,
0x2d595, 0x2d5a0, 0x2d5dd, 0x2d5e0, 0x2d5ed, 0x2d600,
0x2d68d, 0x2d6a0, 0x2d6b5, 0x2d6b6, 0x2d6c5, 0x2d6c6,
0x2d6f1, 0x2d6fa, 0x2d721, 0x2da80, 0x2daf5, 0x2dc80,
0x2dd37, 0x2de00, 0x2de97, 0x2de9e, 0x2df11, 0x2df1e,
0x2df41, 0x2dfc0, 0x2dfcb, 0x2dfe0, 0x2dfe5, 0x2e000,
0x30ff1, 0x31000, 0x319ad, 0x319fe, 0x31a13, 0x35fe0,
0x35fe9, 0x35fea, 0x35ff9, 0x35ffa, 0x35fff, 0x36000,
0x36247, 0x36264, 0x36267, 0x362a0, 0x362a7, 0x362aa,
0x362ad, 0x362c8, 0x362d1, 0x362e0, 0x365f9, 0x37800,
0x378d7, 0x378e0, 0x378fb, 0x37900, 0x37913, 0x37920,
0x37935, 0x37938, 0x37941, 0x39800, 0x399f5, 0x39a00,
0x39d69, 0x39e00, 0x39e5d, 0x39e60, 0x39e8f, 0x39ea0,
0x39f89, 0x3a000, 0x3a1ed, 0x3a200, 0x3a24f, 0x3a252,
0x3a2e7, 0x3a2f6, 0x3a3d7, 0x3a400, 0x3a48d, 0x3a580,
0x3a5a9, 0x3a5c0, 0x3a5e9, 0x3a600, 0x3a6af, 0x3a6c0,
0x3a6f3, 0x3a800, 0x3a8ab, 0x3a8ac, 0x3a93b, 0x3a93c,
0x3a941, 0x3a944, 0x3a947, 0x3a94a, 0x3a94f, 0x3a952,
0x3a95b, 0x3a95c, 0x3a975, 0x3a976, 0x3a979, 0x3a97a,
0x3a989, 0x3a98a, 0x3aa0d, 0x3aa0e, 0x3aa17, 0x3aa1a,
0x3aa2b, 0x3aa2c, 0x3aa3b, 0x3aa3c, 0x3aa75, 0x3aa76,
0x3aa7f, 0x3aa80, 0x3aa8b, 0x3aa8c, 0x3aa8f, 0x3aa94,
0x3aaa3, 0x3aaa4, 0x3ad4d, 0x3ad50, 0x3af99, 0x3af9c,
0x3b519, 0x3b536, 0x3b541, 0x3b542, 0x3b561, 0x3be00,
0x3be3f, 0x3be4a, 0x3be57, 0x3c000, 0x3c00f, 0x3c010,
0x3c033, 0x3c036, 0x3c045, 0x3c046, 0x3c04b, 0x3c04c,
0x3c057, 0x3c060, 0x3c0dd, 0x3c11e, 0x3c121, 0x3c200,
0x3c25b, 0x3c260, 0x3c27d, 0x3c280, 0x3c295, 0x3c29c,
0x3c2a1, 0x3c520, 0x3c55f, 0x3c580, 0x3c5f5, 0x3c5fe,
0x3c601, 0x3c9a0, 0x3c9f5, 0x3cba0, 0x3cbf7, 0x3cbfe,
0x3cc01, 0x3cfc0, 0x3cfcf, 0x3cfd0, 0x3cfd9, 0x3cfda,
0x3cfdf, 0x3cfe0, 0x3cfff, 0x3d000, 0x3d18b, 0x3d18e,
0x3d1af, 0x3d200, 0x3d299, 0x3d2a0, 0x3d2b5, 0x3d2bc,
0x3d2c1, 0x3d8e2, 0x3d96b, 0x3da02, 0x3da7d, 0x3dc00,
0x3dc09, 0x3dc0a, 0x3dc41, 0x3dc42, 0x3dc47, 0x3dc48,
0x3dc4b, 0x3dc4e, 0x3dc51, 0x3dc52, 0x3dc67, 0x3dc68,
0x3dc71, 0x3dc72, 0x3dc75, 0x3dc76, 0x3dc79, 0x3dc84,
0x3dc87, 0x3dc8e, 0x3dc91, 0x3dc92, 0x3dc95, 0x3dc96,
0x3dc99, 0x3dc9a, 0x3dca1, 0x3dca2, 0x3dca7, 0x3dca8,
0x3dcab, 0x3dcae, 0x3dcb1, 0x3dcb2, 0x3dcb5, 0x3dcb6,
0x3dcb9, 0x3dcba, 0x3dcbd, 0x3dcbe, 0x3dcc1, 0x3dcc2,
0x3dcc7, 0x3dcc8, 0x3dccb, 0x3dcce, 0x3dcd7, 0x3dcd8,
0x3dce7, 0x3dce8, 0x3dcf1, 0x3dcf2, 0x3dcfb, 0x3dcfc,
0x3dcff, 0x3dd00, 0x3dd15, 0x3dd16, 0x3dd39, 0x3dd42,
0x3dd49, 0x3dd4a, 0x3dd55, 0x3dd56, 0x3dd79, 0x3dde0,
0x3dde5, 0x3e000, 0x3e059, 0x3e060, 0x3e129, 0x3e140,
0x3e15f, 0x3e162, 0x3e181, 0x3e182, 0x3e1a1, 0x3e1a2,
0x3e1ed, 0x3e200, 0x3e35d, 0x3e3cc, 0x3e407, 0x3e420,
0x3e479, 0x3e480, 0x3e493, 0x3e4a0, 0x3e4a5, 0x3e4c0,
0x3e4cd, 0x3e600, 0x3edb1, 0x3edb8, 0x3eddb, 0x3ede0,
0x3edfb, 0x3ee00, 0x3eeef, 0x3eef6, 0x3efb5, 0x3efc0,
0x3efd9, 0x3efe0, 0x3efe3, 0x3f000, 0x3f019, 0x3f020,
0x3f091, 0x3f0a0, 0x3f0b5, 0x3f0c0, 0x3f111, 0x3f120,
0x3f15d, 0x3f160, 0x3f179, 0x3f180, 0x3f185, 0x3f200,
0x3f4a9, 0x3f4c0, 0x3f4dd, 0x3f4e0, 0x3f4fb, 0x3f500,
0x3f515, 0x3f51e, 0x3f58f, 0x3f59c, 0x3f5bb, 0x3f5be,
0x3f5d5, 0x3f5e0, 0x3f5f3, 0x3f600, 0x3f727, 0x3f728,
0x3f7f5, 0x40000, 0x54dc1, 0x54e00, 0x56e75, 0x56e80,
0x5703d, 0x57040, 0x59d45, 0x59d60, 0x5d7c3, 0x5d7e0,
0x5dcbd, 0x5f000, 0x5f43d, 0x60000, 0x62697, 0x626a0,
0x64761, 0x1c0200, 0x1c03e1,
};
enum class _Gcb_property {
_Gcb_Other = 0,
_Gcb_Control = 1,
@ -81,7 +333,7 @@
_Gcb_Regional_Indicator = 13,
};
// Values generated by contrib/unicode/gen_std_format_width.py,
// Values generated by contrib/unicode/gen_libstdcxx_unicode_data.py,
// from GraphemeBreakProperty.txt from the Unicode standard.
// Entries are (code_point << shift_bits) + property.
inline constexpr int __gcb_shift_bits = 0x4;
@ -381,7 +633,7 @@
enum class _InCB { _Consonant = 1, _Extend = 2 };
// Values generated by contrib/unicode/gen_std_format_width.py,
// Values generated by contrib/unicode/gen_libstdcxx_unicode_data.py,
// from DerivedCoreProperties.txt from the Unicode standard.
// Entries are (code_point << 2) + property.
inline constexpr uint32_t __incb_edges[] = {
@ -519,7 +771,7 @@
0x380082, 0x380200, 0x380402, 0x3807c0,
};
// Table generated by contrib/unicode/gen_std_format_width.py,
// Table generated by contrib/unicode/gen_libstdcxx_unicode_data.py,
// from emoji-data.txt from the Unicode standard.
inline constexpr char32_t __xpicto_edges[] = {
0xa9, 0xaa, 0xae, 0xaf, 0x203c, 0x203d, 0x2049, 0x204a,

View file

@ -150,6 +150,11 @@ namespace __unicode
base() const requires forward_iterator<_Iter>
{ return _M_curr(); }
[[nodiscard]]
constexpr iter_difference_t<_Iter>
_M_units() const requires forward_iterator<_Iter>
{ return _M_to_increment; }
[[nodiscard]]
constexpr value_type
operator*() const { return _M_buf[_M_buf_index]; }
@ -609,6 +614,18 @@ inline namespace __v16_0_0
return (__p - __width_edges) % 2 + 1;
}
// @pre c <= 0x10FFFF
constexpr bool
__should_escape_category(char32_t __c) noexcept
{
constexpr uint32_t __mask = 0x01;
auto* __end = std::end(__escape_edges);
auto* __p = std::lower_bound(__escape_edges, __end,
(__c << 1u) + 2);
return __p[-1] & __mask;
}
// @pre c <= 0x10FFFF
constexpr _Gcb_property
__grapheme_cluster_break_property(char32_t __c) noexcept

View file

@ -80,8 +80,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
/// @cond undocumented
namespace __format
{
// Type-erased character sink.
// STATICALLY-WIDEN, see C++20 [time.general]
// It doesn't matter for format strings (which can only be char or wchar_t)
// but this returns the narrow string for anything that isn't wchar_t. This
// is done because const char* can be inserted into any ostream type, and
// will be widened at runtime if necessary.
template<typename _CharT>
consteval auto
_Widen(const char* __narrow, const wchar_t* __wide)
{
if constexpr (is_same_v<_CharT, wchar_t>)
return __wide;
else
return __narrow;
}
#define _GLIBCXX_WIDEN_(C, S) ::std::__format::_Widen<C>(S, L##S)
#define _GLIBCXX_WIDEN(S) _GLIBCXX_WIDEN_(_CharT, S)
// Type-erased character sinks.
template<typename _CharT> class _Sink;
template<typename _CharT> class _Fixedbuf_sink;
template<typename _Seq> class _Seq_sink;
template<typename _CharT, typename _Alloc = allocator<_CharT>>
using _Str_sink
= _Seq_sink<basic_string<_CharT, char_traits<_CharT>, _Alloc>>;
// template<typename _CharT, typename _Alloc = allocator<_CharT>>
// using _Vec_sink = _Seq_sink<vector<_CharT, _Alloc>>;
// Output iterator that writes to a type-erase character sink.
template<typename _CharT>
class _Sink_iter;
@ -848,6 +875,286 @@ namespace __format
__spec._M_fill);
}
// Values are indices into _Escapes::all.
enum class _Term_char : unsigned char {
_Tc_quote = 12,
_Tc_apos = 15
};
template<typename _CharT>
struct _Escapes
{
using _Str_view = basic_string_view<_CharT>;
static consteval
_Str_view _S_all()
{ return _GLIBCXX_WIDEN("\t\\t\n\\n\r\\r\\\\\\\"\\\"'\\'\\u\\x"); }
static constexpr
_CharT _S_term(_Term_char __term)
{ return _S_all()[static_cast<unsigned char>(__term)]; }
static consteval
_Str_view _S_tab()
{ return _S_all().substr(0, 3); }
static consteval
_Str_view _S_newline()
{ return _S_all().substr(3, 3); }
static consteval
_Str_view _S_return()
{ return _S_all().substr(6, 3); }
static consteval
_Str_view _S_bslash()
{ return _S_all().substr(9, 3); }
static consteval
_Str_view _S_quote()
{ return _S_all().substr(12, 3); }
static consteval
_Str_view _S_apos()
{ return _S_all().substr(15, 3); }
static consteval
_Str_view _S_u()
{ return _S_all().substr(18, 2); }
static consteval
_Str_view _S_x()
{ return _S_all().substr(20, 2); }
};
template<typename _CharT>
struct _Separators
{
using _Str_view = basic_string_view<_CharT>;
static consteval
_Str_view _S_all()
{ return _GLIBCXX_WIDEN("{}"); }
static consteval
_Str_view _S_braces()
{ return _S_all().substr(0, 2); }
};
template<typename _CharT>
constexpr bool __should_escape_ascii(_CharT __c, _Term_char __term)
{
using _Esc = _Escapes<_CharT>;
switch (__c)
{
case _Esc::_S_tab()[0]:
case _Esc::_S_newline()[0]:
case _Esc::_S_return()[0]:
case _Esc::_S_bslash()[0]:
return true;
case _Esc::_S_quote()[0]:
return __term == _Term_char::_Tc_quote;
case _Esc::_S_apos()[0]:
return __term == _Term_char::_Tc_apos;
default:
return (__c >= 0 && __c < 0x20) || __c == 0x7f;
};
}
// @pre __c <= 0x10FFFF
constexpr bool __should_escape_unicode(char32_t __c, bool __prev_esc)
{
if (__unicode::__should_escape_category(__c))
return __c != U' ';
if (!__prev_esc)
return false;
return __unicode::__grapheme_cluster_break_property(__c)
== __unicode::_Gcb_property::_Gcb_Extend;
}
using uint_least32_t = __UINT_LEAST32_TYPE__;
template<typename _Out, typename _CharT>
_Out
__write_escape_seq(_Out __out, uint_least32_t __val,
basic_string_view<_CharT> __prefix)
{
using _Str_view = basic_string_view<_CharT>;
constexpr size_t __max = 8;
char __buf[__max];
const string_view __narrow(
__buf,
std::__to_chars_i<uint_least32_t>(__buf, __buf + __max, __val, 16).ptr);
__out = __format::__write(__out, __prefix);
*__out = _Separators<_CharT>::_S_braces()[0];
++__out;
if constexpr (is_same_v<char, _CharT>)
__out = __format::__write(__out, __narrow);
#ifdef _GLIBCXX_USE_WCHAR_T
else
{
_CharT __wbuf[__max];
const size_t __n = __narrow.size();
std::__to_wstring_numeric(__narrow.data(), __n, __wbuf);
__out = __format::__write(__out, _Str_view(__wbuf, __n));
}
#endif
*__out = _Separators<_CharT>::_S_braces()[1];
return ++__out;
}
template<typename _Out, typename _CharT>
_Out
__write_escaped_char(_Out __out, _CharT __c)
{
using _UChar = make_unsigned_t<_CharT>;
using _Esc = _Escapes<_CharT>;
switch (__c)
{
case _Esc::_S_tab()[0]:
return __format::__write(__out, _Esc::_S_tab().substr(1, 2));
case _Esc::_S_newline()[0]:
return __format::__write(__out, _Esc::_S_newline().substr(1, 2));
case _Esc::_S_return()[0]:
return __format::__write(__out, _Esc::_S_return().substr(1, 2));
case _Esc::_S_bslash()[0]:
return __format::__write(__out, _Esc::_S_bslash().substr(1, 2));
case _Esc::_S_quote()[0]:
return __format::__write(__out, _Esc::_S_quote().substr(1, 2));
case _Esc::_S_apos()[0]:
return __format::__write(__out, _Esc::_S_apos().substr(1, 2));
default:
return __format::__write_escape_seq(__out,
static_cast<_UChar>(__c),
_Esc::_S_u());
}
}
template<typename _CharT, typename _Out>
_Out
__write_escaped_ascii(_Out __out,
basic_string_view<_CharT> __str,
_Term_char __term)
{
using _Str_view = basic_string_view<_CharT>;
auto __first = __str.begin();
auto const __last = __str.end();
while (__first != __last)
{
auto __print = __first;
// assume anything outside ASCII is printable
while (__print != __last
&& !__format::__should_escape_ascii(*__print, __term))
++__print;
if (__print != __first)
__out = __format::__write(__out, _Str_view(__first, __print));
if (__print == __last)
return __out;
__first = __print;
__out = __format::__write_escaped_char(__out, *__first);
++__first;
}
return __out;
}
template<typename _CharT, typename _Out>
_Out
__write_escaped_unicode(_Out __out,
basic_string_view<_CharT> __str,
_Term_char __term)
{
using _Str_view = basic_string_view<_CharT>;
using _UChar = make_unsigned_t<_CharT>;
using _Esc = _Escapes<_CharT>;
static constexpr char32_t __replace = U'\uFFFD';
static constexpr _Str_view __replace_rep = []
{
// N.B. "\uFFFD" is ill-formed if encoding is not unicode.
if constexpr (is_same_v<char, _CharT>)
return "\xEF\xBF\xBD";
else
return L"\xFFFD";
}();
__unicode::_Utf_view<char32_t, _Str_view> __v(std::move(__str));
auto __first = __v.begin();
auto const __last = __v.end();
bool __prev_esc = true;
while (__first != __last)
{
bool __esc_ascii = false;
bool __esc_unicode = false;
bool __esc_replace = false;
auto __should_escape = [&](auto const& __it)
{
if (*__it <= 0x7f)
return __esc_ascii
= __format::__should_escape_ascii(*__it.base(), __term);
if (__format::__should_escape_unicode(*__it, __prev_esc))
return __esc_unicode = true;
if (*__it == __replace)
{
_Str_view __units(__it.base(), __it._M_units());
return __esc_replace = (__units != __replace_rep);
}
return false;
};
auto __print = __first;
while (__print != __last && !__should_escape(__print))
{
__prev_esc = false;
++__print;
}
if (__print != __first)
__out = __format::__write(__out, _Str_view(__first.base(), __print.base()));
if (__print == __last)
return __out;
__first = __print;
if (__esc_ascii)
__out = __format::__write_escaped_char(__out, *__first.base());
else if (__esc_unicode)
__out = __format::__write_escape_seq(__out, *__first, _Esc::_S_u());
else // __esc_replace
for (_CharT __c : _Str_view(__first.base(), __first._M_units()))
__out = __format::__write_escape_seq(__out,
static_cast<_UChar>(__c),
_Esc::_S_x());
__prev_esc = true;
++__first;
}
return __out;
}
template<typename _CharT, typename _Out>
_Out
__write_escaped(_Out __out, basic_string_view<_CharT> __str, _Term_char __term)
{
*__out = _Escapes<_CharT>::_S_term(__term);
++__out;
if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>())
__out = __format::__write_escaped_unicode(__out, __str, __term);
else if constexpr (is_same_v<char, _CharT>
&& __unicode::__literal_encoding_is_extended_ascii())
__out = __format::__write_escaped_ascii(__out, __str, __term);
else
// TODO Handle non-ascii extended encoding
__out = __format::__write_escaped_ascii(__out, __str, __term);
*__out = _Escapes<_CharT>::_S_term(__term);
return ++__out;
}
// A lightweight optional<locale>.
struct _Optional_locale
{
@ -961,7 +1268,7 @@ namespace __format
if (*__first == 's')
++__first;
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
else if (*__first == '?')
{
__spec._M_type = _Pres_esc;
@ -980,43 +1287,71 @@ namespace __format
format(basic_string_view<_CharT> __s,
basic_format_context<_Out, _CharT>& __fc) const
{
if (_M_spec._M_type == _Pres_esc)
constexpr auto __term = __format::_Term_char::_Tc_quote;
const auto __write_direct = [&]
{
// TODO: C++23 escaped string presentation
}
if (_M_spec._M_type == _Pres_esc)
return __format::__write_escaped(__fc.out(), __s, __term);
else
return __format::__write(__fc.out(), __s);
};
if (_M_spec._M_width_kind == _WP_none
&& _M_spec._M_prec_kind == _WP_none)
return __format::__write(__fc.out(), __s);
return __write_direct();
size_t __estimated_width;
if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>())
{
if (_M_spec._M_prec_kind != _WP_none)
{
size_t __prec = _M_spec._M_get_precision(__fc);
__estimated_width = __unicode::__truncate(__s, __prec);
}
else
__estimated_width = __unicode::__field_width(__s);
}
else
{
__s = __s.substr(0, _M_spec._M_get_precision(__fc));
__estimated_width = __s.size();
}
const size_t __prec =
_M_spec._M_prec_kind != _WP_none
? _M_spec._M_get_precision(__fc)
: basic_string_view<_CharT>::npos;
return __format::__write_padded_as_spec(__s, __estimated_width,
const size_t __estimated_width = _S_trunc(__s, __prec);
// N.B. Escaping only increases width
if (_M_spec._M_get_width(__fc) <= __estimated_width
&& _M_spec._M_prec_kind == _WP_none)
return __write_direct();
if (_M_spec._M_type != _Pres_esc)
return __format::__write_padded_as_spec(__s, __estimated_width,
__fc, _M_spec);
__format::_Str_sink<_CharT> __sink;
__format::_Sink_iter<_CharT> __out(__sink);
__format::__write_escaped(__out, __s, __term);
basic_string_view<_CharT> __escaped(__sink.view().data(),
__sink.view().size());
const size_t __escaped_width = _S_trunc(__escaped, __prec);
// N.B. [tab:format.type.string] defines '?' as
// Copies the escaped string ([format.string.escaped]) to the output,
// so precision seem to appy to escaped string.
return __format::__write_padded_as_spec(__escaped, __escaped_width,
__fc, _M_spec);
}
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void
set_debug_format() noexcept
{ _M_spec._M_type = _Pres_esc; }
#endif
private:
static size_t
_S_trunc(basic_string_view<_CharT>& __s, size_t __prec)
{
if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>())
{
if (__prec != basic_string_view<_CharT>::npos)
return __unicode::__truncate(__s, __prec);
else
return __unicode::__field_width(__s);
}
else
{
__s = __s.substr(0, __prec);
return __s.size();
}
}
_Spec<_CharT> _M_spec{};
};
@ -1120,7 +1455,7 @@ namespace __format
++__first;
}
break;
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
case '?':
if (__type == _AsChar)
{
@ -1272,7 +1607,7 @@ namespace __format
_S_character_width(_CharT __c)
{
// N.B. single byte cannot encode charcter of width greater than 1
if constexpr (sizeof(_CharT) > 1u &&
if constexpr (sizeof(_CharT) > 1u &&
__unicode::__literal_encoding_is_unicode<_CharT>())
return __unicode::__field_width(__c);
else
@ -1286,7 +1621,34 @@ namespace __format
{
return __format::__write_padded_as_spec({&__c, 1u},
_S_character_width(__c),
__fc, _M_spec);
__fc, _M_spec);
}
template<typename _Out>
typename basic_format_context<_Out, _CharT>::iterator
_M_format_character_escaped(_CharT __c,
basic_format_context<_Out, _CharT>& __fc) const
{
using _Esc = _Escapes<_CharT>;
constexpr auto __term = __format::_Term_char::_Tc_apos;
const basic_string_view<_CharT> __in(&__c, 1u);
if (_M_spec._M_get_width(__fc) <= 3u)
return __format::__write_escaped(__fc.out(), __in, __term);
_CharT __buf[12];
__format::_Fixedbuf_sink<_CharT> __sink(__buf);
__format::_Sink_iter<_CharT> __out(__sink);
__format::__write_escaped(__out, __in, __term);
const basic_string_view<_CharT> __escaped = __sink.view();
size_t __estimated_width;
if (__escaped[1] == _Esc::_S_bslash()[0]) // escape sequence
__estimated_width = __escaped.size();
else
__estimated_width = 2 + _S_character_width(__c);
return __format::__write_padded_as_spec(__escaped,
__estimated_width,
__fc, _M_spec);
}
template<typename _Int>
@ -1973,15 +2335,12 @@ namespace __format
|| _M_f._M_spec._M_type == __format::_Pres_c)
return _M_f._M_format_character(__u, __fc);
else if (_M_f._M_spec._M_type == __format::_Pres_esc)
{
// TODO
return __fc.out();
}
return _M_f._M_format_character_escaped(__u, __fc);
else
return _M_f.format(static_cast<make_unsigned_t<_CharT>>(__u), __fc);
}
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void
set_debug_format() noexcept
{ _M_f._M_spec._M_type = __format::_Pres_esc; }
@ -2012,15 +2371,12 @@ namespace __format
|| _M_f._M_spec._M_type == __format::_Pres_c)
return _M_f._M_format_character(__u, __fc);
else if (_M_f._M_spec._M_type == __format::_Pres_esc)
{
// TODO
return __fc.out();
}
return _M_f._M_format_character_escaped(__u, __fc);
else
return _M_f.format(static_cast<unsigned char>(__u), __fc);
}
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void
set_debug_format() noexcept
{ _M_f._M_spec._M_type = __format::_Pres_esc; }
@ -2050,7 +2406,7 @@ namespace __format
format(_CharT* __u, basic_format_context<_Out, _CharT>& __fc) const
{ return _M_f.format(__u, __fc); }
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void set_debug_format() noexcept { _M_f.set_debug_format(); }
#endif
@ -2075,7 +2431,7 @@ namespace __format
basic_format_context<_Out, _CharT>& __fc) const
{ return _M_f.format(__u, __fc); }
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void set_debug_format() noexcept { _M_f.set_debug_format(); }
#endif
@ -2099,7 +2455,7 @@ namespace __format
basic_format_context<_Out, _CharT>& __fc) const
{ return _M_f.format({__u, _Nm}, __fc); }
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void set_debug_format() noexcept { _M_f.set_debug_format(); }
#endif
@ -2123,7 +2479,7 @@ namespace __format
basic_format_context<_Out, char>& __fc) const
{ return _M_f.format(__u, __fc); }
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void set_debug_format() noexcept { _M_f.set_debug_format(); }
#endif
@ -2148,7 +2504,7 @@ namespace __format
basic_format_context<_Out, wchar_t>& __fc) const
{ return _M_f.format(__u, __fc); }
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void set_debug_format() noexcept { _M_f.set_debug_format(); }
#endif
@ -2173,7 +2529,7 @@ namespace __format
basic_format_context<_Out, char>& __fc) const
{ return _M_f.format(__u, __fc); }
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void set_debug_format() noexcept { _M_f.set_debug_format(); }
#endif
@ -2198,7 +2554,7 @@ namespace __format
basic_format_context<_Out, wchar_t>& __fc) const
{ return _M_f.format(__u, __fc); }
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges // C++ >= 23 && HOSTED
constexpr void set_debug_format() noexcept { _M_f.set_debug_format(); }
#endif
@ -2859,6 +3215,32 @@ namespace __format
{ return _Sink_iter<_CharT>(*this); }
};
template<typename _CharT>
class _Fixedbuf_sink final : public _Sink<_CharT>
{
void
_M_overflow() override
{
__glibcxx_assert(false);
this->_M_rewind();
}
public:
[[__gnu__::__always_inline__]]
constexpr explicit
_Fixedbuf_sink(span<_CharT> __buf)
: _Sink<_CharT>(__buf)
{ }
constexpr basic_string_view<_CharT>
view() const
{
auto __s = this->_M_used();
return basic_string_view<_CharT>(__s.data(), __s.size());
}
};
// A sink with an internal buffer. This is used to implement concrete sinks.
template<typename _CharT>
class _Buf_sink : public _Sink<_CharT>
@ -2993,13 +3375,6 @@ namespace __format
}
};
template<typename _CharT, typename _Alloc = allocator<_CharT>>
using _Str_sink
= _Seq_sink<basic_string<_CharT, char_traits<_CharT>, _Alloc>>;
// template<typename _CharT, typename _Alloc = allocator<_CharT>>
// using _Vec_sink = _Seq_sink<vector<_CharT, _Alloc>>;
// A sink that writes to an output iterator.
// Writes to a fixed-size buffer and then flushes to the output iterator
// when the buffer fills up.
@ -3675,17 +4050,17 @@ namespace __format
return _M_visit([&__vis]<typename _Tp>(_Tp& __val) -> decltype(auto)
{
constexpr bool __user_facing = __is_one_of<_Tp,
monostate, bool, _CharT,
int, unsigned int, long long int, unsigned long long int,
float, double, long double,
const _CharT*, basic_string_view<_CharT>,
const void*, handle>::value;
monostate, bool, _CharT,
int, unsigned int, long long int, unsigned long long int,
float, double, long double,
const _CharT*, basic_string_view<_CharT>,
const void*, handle>::value;
if constexpr (__user_facing)
return std::forward<_Visitor>(__vis)(__val);
else
{
handle __h(__val);
return std::forward<_Visitor>(__vis)(__h);
handle __h(__val);
return std::forward<_Visitor>(__vis)(__h);
}
}, __type);
}
@ -4781,6 +5156,7 @@ namespace __format
: __format::__range_default_formatter<format_kind<_Rg>, _Rg, _CharT>
{ };
#endif // C++23 formatting ranges
#undef _GLIBCXX_WIDEN
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std

View file

@ -3,7 +3,6 @@
#include <format>
#include <vector>
#include <chrono> // For _Widen
#include <testsuite_hooks.h>
static_assert(!std::formattable<std::vector<bool>::reference, int>);
@ -21,7 +20,7 @@ is_format_string_for(const char* str, Args&&... args)
}
}
#define WIDEN_(C, S) ::std::chrono::__detail::_Widen<C>(S, L##S)
#define WIDEN_(C, S) ::std::__format::_Widen<C>(S, L##S)
#define WIDEN(S) WIDEN_(_CharT, S)
void

View file

@ -0,0 +1,454 @@
// { dg-options "-fexec-charset=UTF-8 -fwide-exec-charset=UTF-32LE -DUNICODE_ENC" }
// { dg-do run { target c++23 } }
// { dg-add-options no_pch }
#include <format>
#include <testsuite_hooks.h>
std::string
fdebug(char t)
{ return std::format("{:?}", t); }
std::wstring
fdebug(wchar_t t)
{ return std::format(L"{:?}", t); }
std::string
fdebug(std::string_view t)
{ return std::format("{:?}", t); }
std::wstring
fdebug(std::wstring_view t)
{ return std::format(L"{:?}", t); }
#define WIDEN_(C, S) ::std::__format::_Widen<C>(S, L##S)
#define WIDEN(S) WIDEN_(_CharT, S)
template<typename _CharT>
void
test_basic_escapes()
{
std::basic_string<_CharT> res;
const auto tab = WIDEN("\t");
res = fdebug(tab);
VERIFY( res == WIDEN(R"("\t")") );
res = fdebug(tab[0]);
VERIFY( res == WIDEN(R"('\t')") );
const auto nline = WIDEN("\n");
res = fdebug(nline);
VERIFY( res == WIDEN(R"("\n")") );
res = fdebug(nline[0]);
VERIFY( res == WIDEN(R"('\n')") );
const auto carret = WIDEN("\r");
res = fdebug(carret);
VERIFY( res == WIDEN(R"("\r")") );
res = fdebug(carret[0]);
VERIFY( res == WIDEN(R"('\r')") );
const auto bslash = WIDEN("\\");
res = fdebug(bslash);
VERIFY( res == WIDEN(R"("\\")") );
res = fdebug(bslash[0]);
VERIFY( res == WIDEN(R"('\\')") );
const auto quote = WIDEN("\"");
res = fdebug(quote);
VERIFY( res == WIDEN(R"("\"")") );
res = fdebug(quote[0]);
VERIFY( res == WIDEN(R"('"')") );
const auto apos = WIDEN("\'");
res = fdebug(apos);
VERIFY( res == WIDEN(R"("'")") );
res = fdebug(apos[0]);
VERIFY( res == WIDEN(R"('\'')") );
}
template<typename _CharT>
void
test_ascii_escapes()
{
std::basic_string<_CharT> res;
const auto in = WIDEN("\x10 abcde\x7f\t0123");
res = fdebug(in);
VERIFY( res == WIDEN(R"("\u{10} abcde\u{7f}\t0123")") );
res = fdebug(in[0]);
VERIFY( res == WIDEN(R"('\u{10}')") );
res = fdebug(in[1]);
VERIFY( res == WIDEN(R"(' ')") );
res = fdebug(in[2]);
VERIFY( res == WIDEN(R"('a')") );
}
template<typename _CharT>
void
test_extended_ascii()
{
std::basic_string<_CharT> res;
const auto in = WIDEN("Åëÿ");
res = fdebug(in);
VERIFY( res == WIDEN(R"("Åëÿ")") );
static constexpr bool __test_characters
#if UNICODE_ENC
= sizeof(_CharT) >= 2;
#else // ISO8859-1
= true;
#endif // UNICODE_ENC
if constexpr (__test_characters)
{
res = fdebug(in[0]);
VERIFY( res == WIDEN(R"('Å')") );
res = fdebug(in[1]);
VERIFY( res == WIDEN(R"('ë')") );
res = fdebug(in[2]);
VERIFY( res == WIDEN(R"('ÿ')") );
}
}
#if UNICODE_ENC
template<typename _CharT>
void
test_unicode_escapes()
{
std::basic_string<_CharT> res;
const auto in = WIDEN(
"\u008a" // Cc, Control, Line Tabulation Set,
"\u00ad" // Cf, Format, Soft Hyphen
"\u1d3d" // Lm, Modifier letter, Modifier Letter Capital Ou
"\u00a0" // Zs, Space Separator, No-Break Space (NBSP)
"\u2029" // Zp, Paragraph Separator, Paragraph Separator
"\U0001f984" // So, Other Symbol, Unicorn Face
);
const auto out = WIDEN("\""
R"(\u{8a})"
R"(\u{ad})"
"\u1d3d"
R"(\u{a0})"
R"(\u{2029})"
"\U0001f984"
"\"");
res = fdebug(in);
VERIFY( res == out );
if constexpr (sizeof(_CharT) >= 2)
{
res = fdebug(in[0]);
VERIFY( res == WIDEN(R"('\u{8a}')") );
res = fdebug(in[1]);
VERIFY( res == WIDEN(R"('\u{ad}')") );
res = fdebug(in[2]);
VERIFY( res == WIDEN("'\u1d3d'") );
res = fdebug(in[3]);
VERIFY( res == WIDEN(R"('\u{a0}')") );
res = fdebug(in[4]);
VERIFY( res == WIDEN(R"('\u{2029}')") );
}
if constexpr (sizeof(_CharT) >= 4)
{
res = fdebug(in[5]);
VERIFY( res == WIDEN("'\U0001f984'") );
}
}
template<typename _CharT>
void
test_grapheme_extend()
{
std::basic_string<_CharT> res;
const auto vin = WIDEN("o\u0302\u0323");
res = fdebug(vin);
VERIFY( res == WIDEN("\"o\u0302\u0323\"") );
std::basic_string_view<_CharT> in = WIDEN("\t\u0302\u0323");
res = fdebug(in);
VERIFY( res == WIDEN(R"("\t\u{302}\u{323}")") );
res = fdebug(in.substr(1));
VERIFY( res == WIDEN(R"("\u{302}\u{323}")") );
if constexpr (sizeof(_CharT) >= 2)
{
res = fdebug(in[1]);
VERIFY( res == WIDEN(R"('\u{302}')") );
}
}
template<typename _CharT>
void
test_replacement_char()
{
std::basic_string<_CharT> repl = WIDEN("\uFFFD");
std::basic_string<_CharT> res = fdebug(repl);
VERIFY( res == WIDEN("\"\uFFFD\"") );
repl = WIDEN("\uFFFD\uFFFD");
res = fdebug(repl);
VERIFY( res == WIDEN("\"\uFFFD\uFFFD\"") );
}
void
test_ill_formed_utf8_seq()
{
std::string_view seq = "\xf0\x9f\xa6\x84"; // \U0001F984
std::string res;
res = fdebug(seq);
VERIFY( res == "\"\U0001F984\"" );
res = fdebug(seq.substr(1));
VERIFY( res == R"("\x{9f}\x{a6}\x{84}")" );
res = fdebug(seq.substr(2));
VERIFY( res == R"("\x{a6}\x{84}")" );
res = fdebug(seq[0]);
VERIFY( res == R"('\x{f0}')" );
res = fdebug(seq.substr(0, 1));
VERIFY( res == R"("\x{f0}")" );
res = fdebug(seq[1]);
VERIFY( res == R"('\x{9f}')" );
res = fdebug(seq.substr(1, 1));
VERIFY( res == R"("\x{9f}")" );
res = fdebug(seq[2]);
VERIFY( res == R"('\x{a6}')" );
res = fdebug(seq.substr(2, 1));
VERIFY( res == R"("\x{a6}")" );
res = fdebug(seq[3]);
VERIFY( res == R"('\x{84}')" );
res = fdebug(seq.substr(3, 1));
VERIFY( res == R"("\x{84}")" );
}
void
test_ill_formed_utf32()
{
std::wstring res;
wchar_t ic1 = static_cast<wchar_t>(0xff'ffff);
res = fdebug(ic1);
VERIFY( res == LR"('\x{ffffff}')" );
std::wstring is1(1, ic1);
res = fdebug(is1);
VERIFY( res == LR"("\x{ffffff}")" );
wchar_t ic2 = static_cast<wchar_t>(0xffff'ffff);
res = fdebug(ic2);
VERIFY( res == LR"('\x{ffffffff}')" );
std::wstring is2(1, ic2);
res = fdebug(is2);
VERIFY( res == LR"("\x{ffffffff}")" );
}
#endif // UNICODE_ENC
template<typename _CharT>
void
test_fill()
{
std::basic_string<_CharT> res;
std::basic_string_view<_CharT> in = WIDEN("a\t\x10\u00ad");
res = std::format(WIDEN("{:10?}"), in.substr(0, 1));
VERIFY( res == WIDEN(R"("a" )") );
res = std::format(WIDEN("{:->10?}"), in.substr(1, 1));
VERIFY( res == WIDEN(R"(------"\t")") );
res = std::format(WIDEN("{:+<10?}"), in.substr(2, 1));
VERIFY( res == WIDEN(R"("\u{10}"++)") );
res = std::format(WIDEN("{:10?}"), in[0]);
VERIFY( res == WIDEN(R"('a' )") );
res = std::format(WIDEN("{:->10?}"), in[1]);
VERIFY( res == WIDEN(R"(------'\t')") );
res = std::format(WIDEN("{:+<10?}"), in[2]);
VERIFY( res == WIDEN(R"('\u{10}'++)") );
#if UNICODE_ENC
res = std::format(WIDEN("{:=^10?}"), in.substr(3));
VERIFY( res == WIDEN(R"(="\u{ad}"=)") );
// width is 2
std::basic_string_view<_CharT> in2 = WIDEN("\u1100");
res = std::format(WIDEN("{:*^10?}"), in2);
VERIFY( res == WIDEN("***\"\u1100\"***") );
if constexpr (sizeof(_CharT) >= 2)
{
res = std::format(WIDEN("{:=^10?}"), in[3]);
VERIFY( res == WIDEN(R"(='\u{ad}'=)") );
res = std::format(WIDEN("{:*^10?}"), in2[0]);
VERIFY( res == WIDEN("***'\u1100'***") );
}
#endif // UNICODE_ENC
}
template<typename _CharT>
void
test_prec()
{
std::basic_string<_CharT> res;
// with ? escpaed presentation is copied to ouput, same as source
std::basic_string_view<_CharT> in = WIDEN("a\t\x10\u00ad");
res = std::format(WIDEN("{:.2?}"), in.substr(0, 1));
VERIFY( res == WIDEN(R"("a)") );
res = std::format(WIDEN("{:.4?}"), in.substr(1, 1));
VERIFY( res == WIDEN(R"("\t")") );
res = std::format(WIDEN("{:.5?}"), in.substr(2, 1));
VERIFY( res == WIDEN(R"("\u{1)") );
#if UNICODE_ENC
res = std::format(WIDEN("{:.10?}"), in.substr(3));
VERIFY( res == WIDEN(R"("\u{ad}")") );
std::basic_string_view<_CharT> in2 = WIDEN("\u1100");
res = std::format(WIDEN("{:.3?}"), in2);
VERIFY( res == WIDEN("\"\u1100") );
#endif // UNICODE_ENC
}
void test_char_as_wchar()
{
std::wstring res;
res = std::format(L"{:?}", 'a');
VERIFY( res == LR"('a')" );
res = std::format(L"{:?}", '\t');
VERIFY( res == LR"('\t')" );
res = std::format(L"{:+<10?}", '\x10');
VERIFY( res == LR"('\u{10}'++)" );
}
template<typename T>
struct DebugWrapper
{
T val;
};
template<typename T, typename CharT>
struct std::formatter<DebugWrapper<T>, CharT>
{
constexpr std::basic_format_parse_context<CharT>::iterator
parse(std::basic_format_parse_context<CharT>& pc)
{
auto out = under.parse(pc);
under.set_debug_format();
return out;
}
template<typename Out>
Out format(DebugWrapper<T> const& t,
std::basic_format_context<Out, CharT>& fc) const
{ return under.format(t.val, fc); }
private:
std::formatter<T, CharT> under;
};
template<typename _CharT, typename StrT>
void
test_formatter_str()
{
_CharT buf[]{ 'a', 'b', 'c', 0 };
DebugWrapper<StrT> in{ buf };
std::basic_string<_CharT> res = std::format(WIDEN("{:?}"), in );
VERIFY( res == WIDEN(R"("abc")") );
}
template<typename _CharT>
void
test_formatter_arr()
{
std::basic_string<_CharT> res;
DebugWrapper<_CharT[3]> in3{ 'a', 'b', 'c' };
res = std::format(WIDEN("{:?}"), in3 );
VERIFY( res == WIDEN(R"("abc")") );
// We print all characters, including null-terminator
DebugWrapper<_CharT[4]> in4{ 'a', 'b', 'c', 0 };
res = std::format(WIDEN("{:?}"), in4 );
VERIFY( res == WIDEN(R"("abc\u{0}")") );
}
template<typename _CharT, typename SrcT>
void
test_formatter_char()
{
DebugWrapper<SrcT> in{ 'a' };
std::basic_string<_CharT> res = std::format(WIDEN("{:?}"), in);
VERIFY( res == WIDEN(R"('a')") );
}
template<typename CharT>
void
test_formatters()
{
test_formatter_char<CharT, CharT>();
test_formatter_str<CharT, CharT*>();
test_formatter_str<CharT, const CharT*>();
test_formatter_str<CharT, std::basic_string<CharT>>();
test_formatter_str<CharT, std::basic_string_view<CharT>>();
test_formatter_arr<CharT>();
}
void
test_formatters_c()
{
test_formatters<char>();
test_formatters<wchar_t>();
test_formatter_char<wchar_t, char>();
}
int main()
{
test_basic_escapes<char>();
test_basic_escapes<wchar_t>();
test_ascii_escapes<char>();
test_ascii_escapes<wchar_t>();
test_extended_ascii<char>();
test_extended_ascii<wchar_t>();
#if UNICODE_ENC
test_unicode_escapes<char>();
test_unicode_escapes<wchar_t>();
test_grapheme_extend<char>();
test_grapheme_extend<wchar_t>();
test_replacement_char<char>();
test_replacement_char<wchar_t>();
test_ill_formed_utf8_seq();
test_ill_formed_utf32();
#endif // UNICODE_ENC
test_fill<char>();
test_fill<wchar_t>();
test_prec<char>();
test_prec<wchar_t>();
test_formatters_c();
}

View file

@ -0,0 +1,5 @@
// { dg-options "-fexec-charset=ISO8859-1 -fwide-exec-charset=UTF-32LE" }
// { dg-do run { target c++23 } }
// { dg-add-options no_pch }
#include "debug.cc"

View file

@ -108,7 +108,7 @@ is_std_format_spec_for(std::string_view spec)
}
}
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges
constexpr bool escaped_strings_supported = true;
#else
constexpr bool escaped_strings_supported = false;

View file

@ -62,7 +62,7 @@ test_indexing()
VERIFY( ! is_format_string_for("{} {0}", 1) );
}
#if __cpp_lib_format_ranges
#if __glibcxx_format_ranges
constexpr bool escaped_strings_supported = true;
#else
constexpr bool escaped_strings_supported = false;