libstdc++: Add "ASCII" as an alias for std::text_encoding:🆔:ASCII
As noted in LWG 4043, "ASCII" is not an alias for any known registered character encoding, so std::text_encoding("ASCII").mib() == id::other. Add the alias "ASCII" to the implementation-defined superset of aliases for that encoding. libstdc++-v3/ChangeLog: * include/bits/text_encoding-data.h: Regenerate. * scripts/gen_text_encoding_data.py: Add extra_aliases dict containing "ASCII". * testsuite/std/text_encoding/cons.cc: Check "ascii" is known. Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com> Signed-off-by: Ewan Higgs <ewan.higgs@gmail.com>
This commit is contained in:
parent
00b2d7d17c
commit
358fd42aab
3 changed files with 30 additions and 2 deletions
|
@ -14,6 +14,7 @@
|
|||
{ 3, "IBM367" },
|
||||
{ 3, "cp367" },
|
||||
{ 3, "csASCII" },
|
||||
{ 3, "ASCII" }, // libstdc++ extension
|
||||
{ 4, "ISO_8859-1:1987" },
|
||||
{ 4, "iso-ir-100" },
|
||||
{ 4, "ISO_8859-1" },
|
||||
|
@ -417,7 +418,7 @@
|
|||
{ 104, "csISO2022CN" },
|
||||
{ 105, "ISO-2022-CN-EXT" },
|
||||
{ 105, "csISO2022CNEXT" },
|
||||
#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET 413
|
||||
#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET 414
|
||||
{ 106, "UTF-8" },
|
||||
{ 106, "csUTF8" },
|
||||
{ 109, "ISO-8859-13" },
|
||||
|
|
|
@ -36,6 +36,18 @@ print("#ifndef _GLIBCXX_GET_ENCODING_DATA")
|
|||
print('# error "This is not a public header, do not include it directly"')
|
||||
print("#endif\n")
|
||||
|
||||
# We need to generate a list of initializers of the form { mib, alias }, e.g.,
|
||||
# { 3, "US-ASCII" },
|
||||
# { 3, "ISO646-US" },
|
||||
# { 3, "csASCII" },
|
||||
# { 4, "ISO_8859-1:1987" },
|
||||
# { 4, "latin1" },
|
||||
# The initializers must be sorted by the mib value. The first entry for
|
||||
# a given mib must be the primary name for the encoding. Any aliases for
|
||||
# the encoding come after the primary name.
|
||||
# We also define a macro _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET which is the
|
||||
# offset into the list of the mib=106, alias="UTF-8" entry. This is used
|
||||
# to optimize the common case, so we don't need to search for "UTF-8".
|
||||
|
||||
charsets = {}
|
||||
with open(sys.argv[1], newline='') as f:
|
||||
|
@ -52,10 +64,15 @@ with open(sys.argv[1], newline='') as f:
|
|||
aliases.remove(name)
|
||||
charsets[mib] = [name] + aliases
|
||||
|
||||
# Remove "NATS-DANO" and "NATS-DANO-ADD"
|
||||
# Remove "NATS-DANO" and "NATS-DANO-ADD" as specified by the C++ standard.
|
||||
charsets.pop(33, None)
|
||||
charsets.pop(34, None)
|
||||
|
||||
# This is not an official IANA alias, but we include it in the
|
||||
# implementation-defined superset of aliases for US-ASCII.
|
||||
# See also LWG 4043.
|
||||
extra_aliases = {3: ["ASCII"]}
|
||||
|
||||
count = 0
|
||||
for mib in sorted(charsets.keys()):
|
||||
names = charsets[mib]
|
||||
|
@ -64,6 +81,11 @@ for mib in sorted(charsets.keys()):
|
|||
for name in names:
|
||||
print(' {{ {:4}, "{}" }},'.format(mib, name))
|
||||
count += len(names)
|
||||
if mib in extra_aliases:
|
||||
names = extra_aliases[mib]
|
||||
for name in names:
|
||||
print(' {{ {:4}, "{}" }}, // libstdc++ extension'.format(mib, name))
|
||||
count += len(names)
|
||||
|
||||
# <text_encoding> gives an error if this macro is left defined.
|
||||
# Do this last, so that the generated output is not usable unless we reach here.
|
||||
|
|
|
@ -53,6 +53,11 @@ test_construct_by_name()
|
|||
VERIFY( e4.name() == s );
|
||||
VERIFY( ! e4.aliases().empty() );
|
||||
VERIFY( e4.aliases().front() == "US-ASCII"sv ); // primary name
|
||||
|
||||
s = "ascii";
|
||||
std::text_encoding e5(s);
|
||||
VERIFY( e5.mib() == std::text_encoding::ASCII );
|
||||
VERIFY( e5.name() == s );
|
||||
}
|
||||
|
||||
constexpr void
|
||||
|
|
Loading…
Add table
Reference in a new issue