gcc/libstdc++-v3/testsuite/std/text_encoding/cons.cc

// { dg-do run { target c++26 } }
// { dg-require-cpp-feature-test "__cpp_lib_text_encoding" }

#include <text_encoding>
#include <string_view>
#include <testsuite_hooks.h>

using namespace std::string_view_literals;

constexpr void
test_default_construct()
{
  std::text_encoding e0;
  VERIFY( e0.mib() == std::text_encoding::unknown );
  VERIFY( e0.name()[0] == '\0' ); // P2862R1 name() should never return null
  VERIFY( e0.aliases().empty() );
}

constexpr void
test_construct_by_name()
{
  std::string_view s;
  std::text_encoding e0(s);
  VERIFY( e0.mib() == std::text_encoding::other );
  VERIFY( e0.name() == s );
  VERIFY( e0.aliases().empty() );

  s = "not a real encoding";
  std::text_encoding e1(s);
  VERIFY( e1.mib() == std::text_encoding::other );
  VERIFY( e1.name() == s );
  VERIFY( e1.aliases().empty() );

  VERIFY( e1 != e0 );
  VERIFY( e1 == e0.mib() );

  s = "utf8";
  std::text_encoding e2(s);
  VERIFY( e2.mib() == std::text_encoding::UTF8 );
  VERIFY( e2.name() == s );
  VERIFY( ! e2.aliases().empty() );
  VERIFY( e2.aliases().front() == "UTF-8"sv );

  s = "Latin-1"; // matches "latin1"
  std::text_encoding e3(s);
  VERIFY( e3.mib() == std::text_encoding::ISOLatin1 );
  VERIFY( e3.name() == s );
  VERIFY( ! e3.aliases().empty() );
  VERIFY( e3.aliases().front() == "ISO_8859-1:1987"sv ); // primary name

  s = "U.S."; // matches "us"
  std::text_encoding e4(s);
  VERIFY( e4.mib() == std::text_encoding::ASCII );
  VERIFY( e4.name() == s );
  VERIFY( ! e4.aliases().empty() );
  VERIFY( e4.aliases().front() == "US-ASCII"sv ); // primary name

  s = "ascii";
  std::text_encoding e5(s);
  VERIFY( e5.mib() == std::text_encoding::ASCII );
  VERIFY( e5.name() == s );
}

constexpr void
test_construct_by_id()
{
  std::text_encoding e0(std::text_encoding::other);
  VERIFY( e0.mib() == std::text_encoding::other );
  VERIFY( e0.name() == ""sv );
  VERIFY( e0.aliases().empty() );

  std::text_encoding e1(std::text_encoding::unknown);
  VERIFY( e1.mib() == std::text_encoding::unknown );
  VERIFY( e1.name() == ""sv );
  VERIFY( e1.aliases().empty() );

  std::text_encoding e2(std::text_encoding::UTF8);
  VERIFY( e2.mib() == std::text_encoding::UTF8 );
  VERIFY( e2.name() == "UTF-8"sv );
  VERIFY( ! e2.aliases().empty() );
  VERIFY( e2.aliases().front() == std::string_view(e2.name()) );
  bool found = false;
  for (auto alias : e2.aliases())
    if (alias == "csUTF8"sv)
    {
      found = true;
      break;
    }
  VERIFY( found );
}

constexpr void
test_copy_construct()
{
  std::text_encoding e0;
  std::text_encoding e1 = e0;
  VERIFY( e1 == e0 );

  std::text_encoding e2(std::text_encoding::UTF8);
  auto e3 = e2;
  VERIFY( e3 == e2 );

  e1 = e3;
  VERIFY( e1 == e2 );
}

int main()
{
  auto run_tests = [] {
    test_default_construct();
    test_construct_by_name();
    test_construct_by_id();
    test_copy_construct();
    return true;
  };

  run_tests();
  static_assert( run_tests() );
}
libstdc++: Implement C++26 std::text_encoding (P1885R12) [PR113318] This is another C++26 change, approved in Varna 2023. We require a new static array of data that is extracted from the IANA Character Sets database. A new Python script to generate a header from the IANA CSV file is added. The text_encoding class is basically just a pointer to an {ID,name} pair in the static array. The aliases view is also just the same pointer (or empty), and the view's iterator moves forwards and backwards in the array while the array elements have the same ID (or to one element further, for a past-the-end iterator). Because those iterators refer to a global array that never goes out of scope, there's no reason they should every produce undefined behaviour or indeterminate values. They should either have well-defined behaviour, or abort. The overhead of ensuring those properties is pretty low, so seems worth it. This means that an aliases_view iterator should never be able to access out-of-bounds. A non-value-initialized iterator always points to an element of the static array even when not dereferenceable (the array has unreachable entries at the start and end, which means that even a past-the-end iterator for the last encoding in the array still points to valid memory). Dereferencing an iterator can always return a valid array element, or "" for a non-dereferenceable iterator (but doing so will abort when assertions are enabled). In the language being proposed for C++26, dereferencing an invalid iterator erroneously returns "". Attempting to increment/decrement past the last/first element in the view is erroneously a no-op, so aborts when assertions are enabled, and doesn't change value otherwise. Similarly, constructing a std::text_encoding with an invalid id (one that doesn't have the value of an enumerator) erroneously behaves the same as constructing with id::unknown, or aborts with assertions enabled. libstdc++-v3/ChangeLog: PR libstdc++/113318 * acinclude.m4 (GLIBCXX_CONFIGURE): Add c++26 directory. (GLIBCXX_CHECK_TEXT_ENCODING): Define. * config.h.in: Regenerate. * configure: Regenerate. * configure.ac: Use GLIBCXX_CHECK_TEXT_ENCODING. * include/Makefile.am: Add new headers. * include/Makefile.in: Regenerate. * include/bits/locale_classes.h (locale::encoding): Declare new member function. * include/bits/unicode.h (__charset_alias_match): New function. * include/bits/text_encoding-data.h: New file. * include/bits/version.def (text_encoding): Define. * include/bits/version.h: Regenerate. * include/std/text_encoding: New file. * src/Makefile.am: Add new subdirectory. * src/Makefile.in: Regenerate. * src/c++26/Makefile.am: New file. * src/c++26/Makefile.in: New file. * src/c++26/text_encoding.cc: New file. * src/experimental/Makefile.am: Include c++26 convenience library. * src/experimental/Makefile.in: Regenerate. * python/libstdcxx/v6/printers.py (StdTextEncodingPrinter): New printer. * scripts/gen_text_encoding_data.py: New file. * testsuite/22_locale/locale/encoding.cc: New test. * testsuite/ext/unicode/charset_alias_match.cc: New test. * testsuite/std/text_encoding/cons.cc: New test. * testsuite/std/text_encoding/members.cc: New test. * testsuite/std/text_encoding/requirements.cc: New test. Reviewed-by: Ulrich Drepper <drepper.fsp@gmail.com> Reviewed-by: Patrick Palka <ppalka@redhat.com> 2024-01-15 15:42:50 +00:00			`// { dg-do run { target c++26 } }`
libstdc++: Add dg-require-cpp-feature-test to test feature test macros This adds a new dejagnu directive which can be used to make a test depend on a feature test macro such as __cpp_lib_text_encoding. This is mroe flexible than writing a new dg-require-xxx for each feature. libstdc++-v3/ChangeLog: * testsuite/lib/dg-options.exp (dg-require-cpp-feature-test): New proc. * testsuite/lib/libstdc++.exp (check_v3_target_cpp_feature_test): New proc. * testsuite/std/text_encoding/cons.cc: Use new directive to skip the test if the __cpp_lib_text_encoding feature test macro is not defined. * testsuite/std/text_encoding/requirements.cc: Likewise. 2024-03-22 22:01:50 +00:00			`// { dg-require-cpp-feature-test "__cpp_lib_text_encoding" }`
libstdc++: Implement C++26 std::text_encoding (P1885R12) [PR113318] This is another C++26 change, approved in Varna 2023. We require a new static array of data that is extracted from the IANA Character Sets database. A new Python script to generate a header from the IANA CSV file is added. The text_encoding class is basically just a pointer to an {ID,name} pair in the static array. The aliases view is also just the same pointer (or empty), and the view's iterator moves forwards and backwards in the array while the array elements have the same ID (or to one element further, for a past-the-end iterator). Because those iterators refer to a global array that never goes out of scope, there's no reason they should every produce undefined behaviour or indeterminate values. They should either have well-defined behaviour, or abort. The overhead of ensuring those properties is pretty low, so seems worth it. This means that an aliases_view iterator should never be able to access out-of-bounds. A non-value-initialized iterator always points to an element of the static array even when not dereferenceable (the array has unreachable entries at the start and end, which means that even a past-the-end iterator for the last encoding in the array still points to valid memory). Dereferencing an iterator can always return a valid array element, or "" for a non-dereferenceable iterator (but doing so will abort when assertions are enabled). In the language being proposed for C++26, dereferencing an invalid iterator erroneously returns "". Attempting to increment/decrement past the last/first element in the view is erroneously a no-op, so aborts when assertions are enabled, and doesn't change value otherwise. Similarly, constructing a std::text_encoding with an invalid id (one that doesn't have the value of an enumerator) erroneously behaves the same as constructing with id::unknown, or aborts with assertions enabled. libstdc++-v3/ChangeLog: PR libstdc++/113318 * acinclude.m4 (GLIBCXX_CONFIGURE): Add c++26 directory. (GLIBCXX_CHECK_TEXT_ENCODING): Define. * config.h.in: Regenerate. * configure: Regenerate. * configure.ac: Use GLIBCXX_CHECK_TEXT_ENCODING. * include/Makefile.am: Add new headers. * include/Makefile.in: Regenerate. * include/bits/locale_classes.h (locale::encoding): Declare new member function. * include/bits/unicode.h (__charset_alias_match): New function. * include/bits/text_encoding-data.h: New file. * include/bits/version.def (text_encoding): Define. * include/bits/version.h: Regenerate. * include/std/text_encoding: New file. * src/Makefile.am: Add new subdirectory. * src/Makefile.in: Regenerate. * src/c++26/Makefile.am: New file. * src/c++26/Makefile.in: New file. * src/c++26/text_encoding.cc: New file. * src/experimental/Makefile.am: Include c++26 convenience library. * src/experimental/Makefile.in: Regenerate. * python/libstdcxx/v6/printers.py (StdTextEncodingPrinter): New printer. * scripts/gen_text_encoding_data.py: New file. * testsuite/22_locale/locale/encoding.cc: New test. * testsuite/ext/unicode/charset_alias_match.cc: New test. * testsuite/std/text_encoding/cons.cc: New test. * testsuite/std/text_encoding/members.cc: New test. * testsuite/std/text_encoding/requirements.cc: New test. Reviewed-by: Ulrich Drepper <drepper.fsp@gmail.com> Reviewed-by: Patrick Palka <ppalka@redhat.com> 2024-01-15 15:42:50 +00:00
			`#include <text_encoding>`
			`#include <string_view>`
			`#include <testsuite_hooks.h>`

			`using namespace std::string_view_literals;`

			`constexpr void`
			`test_default_construct()`
			`{`
			`std::text_encoding e0;`
			`VERIFY( e0.mib() == std::text_encoding::unknown );`
			`VERIFY( e0.name()[0] == '\0' ); // P2862R1 name() should never return null`
			`VERIFY( e0.aliases().empty() );`
			`}`

			`constexpr void`
			`test_construct_by_name()`
			`{`
			`std::string_view s;`
			`std::text_encoding e0(s);`
			`VERIFY( e0.mib() == std::text_encoding::other );`
			`VERIFY( e0.name() == s );`
			`VERIFY( e0.aliases().empty() );`

			`s = "not a real encoding";`
			`std::text_encoding e1(s);`
			`VERIFY( e1.mib() == std::text_encoding::other );`
			`VERIFY( e1.name() == s );`
			`VERIFY( e1.aliases().empty() );`

			`VERIFY( e1 != e0 );`
			`VERIFY( e1 == e0.mib() );`

			`s = "utf8";`
			`std::text_encoding e2(s);`
			`VERIFY( e2.mib() == std::text_encoding::UTF8 );`
			`VERIFY( e2.name() == s );`
			`VERIFY( ! e2.aliases().empty() );`
			`VERIFY( e2.aliases().front() == "UTF-8"sv );`

			`s = "Latin-1"; // matches "latin1"`
			`std::text_encoding e3(s);`
			`VERIFY( e3.mib() == std::text_encoding::ISOLatin1 );`
			`VERIFY( e3.name() == s );`
			`VERIFY( ! e3.aliases().empty() );`
			`VERIFY( e3.aliases().front() == "ISO_8859-1:1987"sv ); // primary name`

			`s = "U.S."; // matches "us"`
			`std::text_encoding e4(s);`
			`VERIFY( e4.mib() == std::text_encoding::ASCII );`
			`VERIFY( e4.name() == s );`
			`VERIFY( ! e4.aliases().empty() );`
			`VERIFY( e4.aliases().front() == "US-ASCII"sv ); // primary name`
libstdc++: Add "ASCII" as an alias for std::text_encoding::id::ASCII As noted in LWG 4043, "ASCII" is not an alias for any known registered character encoding, so std::text_encoding("ASCII").mib() == id::other. Add the alias "ASCII" to the implementation-defined superset of aliases for that encoding. libstdc++-v3/ChangeLog: * include/bits/text_encoding-data.h: Regenerate. * scripts/gen_text_encoding_data.py: Add extra_aliases dict containing "ASCII". * testsuite/std/text_encoding/cons.cc: Check "ascii" is known. Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com> Signed-off-by: Ewan Higgs <ewan.higgs@gmail.com> 2024-01-23 14:57:15 +00:00
			`s = "ascii";`
			`std::text_encoding e5(s);`
			`VERIFY( e5.mib() == std::text_encoding::ASCII );`
			`VERIFY( e5.name() == s );`
libstdc++: Implement C++26 std::text_encoding (P1885R12) [PR113318] This is another C++26 change, approved in Varna 2023. We require a new static array of data that is extracted from the IANA Character Sets database. A new Python script to generate a header from the IANA CSV file is added. The text_encoding class is basically just a pointer to an {ID,name} pair in the static array. The aliases view is also just the same pointer (or empty), and the view's iterator moves forwards and backwards in the array while the array elements have the same ID (or to one element further, for a past-the-end iterator). Because those iterators refer to a global array that never goes out of scope, there's no reason they should every produce undefined behaviour or indeterminate values. They should either have well-defined behaviour, or abort. The overhead of ensuring those properties is pretty low, so seems worth it. This means that an aliases_view iterator should never be able to access out-of-bounds. A non-value-initialized iterator always points to an element of the static array even when not dereferenceable (the array has unreachable entries at the start and end, which means that even a past-the-end iterator for the last encoding in the array still points to valid memory). Dereferencing an iterator can always return a valid array element, or "" for a non-dereferenceable iterator (but doing so will abort when assertions are enabled). In the language being proposed for C++26, dereferencing an invalid iterator erroneously returns "". Attempting to increment/decrement past the last/first element in the view is erroneously a no-op, so aborts when assertions are enabled, and doesn't change value otherwise. Similarly, constructing a std::text_encoding with an invalid id (one that doesn't have the value of an enumerator) erroneously behaves the same as constructing with id::unknown, or aborts with assertions enabled. libstdc++-v3/ChangeLog: PR libstdc++/113318 * acinclude.m4 (GLIBCXX_CONFIGURE): Add c++26 directory. (GLIBCXX_CHECK_TEXT_ENCODING): Define. * config.h.in: Regenerate. * configure: Regenerate. * configure.ac: Use GLIBCXX_CHECK_TEXT_ENCODING. * include/Makefile.am: Add new headers. * include/Makefile.in: Regenerate. * include/bits/locale_classes.h (locale::encoding): Declare new member function. * include/bits/unicode.h (__charset_alias_match): New function. * include/bits/text_encoding-data.h: New file. * include/bits/version.def (text_encoding): Define. * include/bits/version.h: Regenerate. * include/std/text_encoding: New file. * src/Makefile.am: Add new subdirectory. * src/Makefile.in: Regenerate. * src/c++26/Makefile.am: New file. * src/c++26/Makefile.in: New file. * src/c++26/text_encoding.cc: New file. * src/experimental/Makefile.am: Include c++26 convenience library. * src/experimental/Makefile.in: Regenerate. * python/libstdcxx/v6/printers.py (StdTextEncodingPrinter): New printer. * scripts/gen_text_encoding_data.py: New file. * testsuite/22_locale/locale/encoding.cc: New test. * testsuite/ext/unicode/charset_alias_match.cc: New test. * testsuite/std/text_encoding/cons.cc: New test. * testsuite/std/text_encoding/members.cc: New test. * testsuite/std/text_encoding/requirements.cc: New test. Reviewed-by: Ulrich Drepper <drepper.fsp@gmail.com> Reviewed-by: Patrick Palka <ppalka@redhat.com> 2024-01-15 15:42:50 +00:00			`}`

			`constexpr void`
			`test_construct_by_id()`
			`{`
			`std::text_encoding e0(std::text_encoding::other);`
			`VERIFY( e0.mib() == std::text_encoding::other );`
			`VERIFY( e0.name() == ""sv );`
			`VERIFY( e0.aliases().empty() );`

			`std::text_encoding e1(std::text_encoding::unknown);`
			`VERIFY( e1.mib() == std::text_encoding::unknown );`
			`VERIFY( e1.name() == ""sv );`
			`VERIFY( e1.aliases().empty() );`

			`std::text_encoding e2(std::text_encoding::UTF8);`
			`VERIFY( e2.mib() == std::text_encoding::UTF8 );`
			`VERIFY( e2.name() == "UTF-8"sv );`
			`VERIFY( ! e2.aliases().empty() );`
			`VERIFY( e2.aliases().front() == std::string_view(e2.name()) );`
			`bool found = false;`
			`for (auto alias : e2.aliases())`
			`if (alias == "csUTF8"sv)`
			`{`
			`found = true;`
			`break;`
			`}`
			`VERIFY( found );`
			`}`

			`constexpr void`
			`test_copy_construct()`
			`{`
			`std::text_encoding e0;`
			`std::text_encoding e1 = e0;`
			`VERIFY( e1 == e0 );`

			`std::text_encoding e2(std::text_encoding::UTF8);`
			`auto e3 = e2;`
			`VERIFY( e3 == e2 );`

			`e1 = e3;`
			`VERIFY( e1 == e2 );`
			`}`

			`int main()`
			`{`
			`auto run_tests = [] {`
			`test_default_construct();`
			`test_construct_by_name();`
			`test_construct_by_id();`
			`test_copy_construct();`
			`return true;`
			`};`

			`run_tests();`
			`static_assert( run_tests() );`
			`}`