71 lines
2.3 KiB
Python
71 lines
2.3 KiB
Python
![]() |
#!/usr/bin/env python3
|
||
|
#
|
||
|
# Script to generate tables for libstdc++ std::text_encoding.
|
||
|
#
|
||
|
# This file is part of GCC.
|
||
|
#
|
||
|
# GCC is free software; you can redistribute it and/or modify it under
|
||
|
# the terms of the GNU General Public License as published by the Free
|
||
|
# Software Foundation; either version 3, or (at your option) any later
|
||
|
# version.
|
||
|
#
|
||
|
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
# for more details.
|
||
|
#
|
||
|
# You should have received a copy of the GNU General Public License
|
||
|
# along with GCC; see the file COPYING3. If not see
|
||
|
# <http://www.gnu.org/licenses/>.
|
||
|
|
||
|
# To update the Libstdc++ static data in <bits/text_encoding-data.h> download
|
||
|
# the latest:
|
||
|
# https://www.iana.org/assignments/character-sets/character-sets-1.csv
|
||
|
# Then run this script and save the output to
|
||
|
# include/bits/text_encoding-data.h
|
||
|
|
||
|
import sys
|
||
|
import csv
|
||
|
|
||
|
if len(sys.argv) != 2:
|
||
|
print("Usage: %s <character sets csv>" % sys.argv[0], file=sys.stderr)
|
||
|
sys.exit(1)
|
||
|
|
||
|
print("// Generated by gen_text_encoding_data.py, do not edit.\n")
|
||
|
print("#ifndef _GLIBCXX_GET_ENCODING_DATA")
|
||
|
print('# error "This is not a public header, do not include it directly"')
|
||
|
print("#endif\n")
|
||
|
|
||
|
|
||
|
charsets = {}
|
||
|
with open(sys.argv[1], newline='') as f:
|
||
|
reader = csv.reader(f)
|
||
|
next(reader) # skip header row
|
||
|
for row in reader:
|
||
|
mib = int(row[2])
|
||
|
if mib in charsets:
|
||
|
raise ValueError("Multiple rows for mibEnum={}".format(mib))
|
||
|
name = row[1]
|
||
|
aliases = row[5].split()
|
||
|
# Ensure primary name comes first
|
||
|
if name in aliases:
|
||
|
aliases.remove(name)
|
||
|
charsets[mib] = [name] + aliases
|
||
|
|
||
|
# Remove "NATS-DANO" and "NATS-DANO-ADD"
|
||
|
charsets.pop(33, None)
|
||
|
charsets.pop(34, None)
|
||
|
|
||
|
count = 0
|
||
|
for mib in sorted(charsets.keys()):
|
||
|
names = charsets[mib]
|
||
|
if names[0] == "UTF-8":
|
||
|
print("#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET {}".format(count))
|
||
|
for name in names:
|
||
|
print(' {{ {:4}, "{}" }},'.format(mib, name))
|
||
|
count += len(names)
|
||
|
|
||
|
# <text_encoding> gives an error if this macro is left defined.
|
||
|
# Do this last, so that the generated output is not usable unless we reach here.
|
||
|
print("\n#undef _GLIBCXX_GET_ENCODING_DATA")
|