From 9e322bc1a5bea706d6f48c15ccba1fc9a8841012 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Sun, 3 May 2009 12:59:26 +0100 Subject: [PATCH] charset.c (one_utf8_to_cppchar): Correct mask used for 5-byte UTF-8 sequences. libcpp: * charset.c (one_utf8_to_cppchar): Correct mask used for 5-byte UTF-8 sequences. gcc/testsuite: * gcc.dg/cpp/utf8-5byte-1.c: New test. From-SVN: r147073 --- gcc/testsuite/ChangeLog | 4 ++++ gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c | 17 +++++++++++++++++ libcpp/ChangeLog | 5 +++++ libcpp/charset.c | 2 +- 4 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index da4f0e2fa73..bdb6f441926 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2009-05-03 Joseph Myers + + * gcc.dg/cpp/utf8-5byte-1.c: New test. + 2009-05-02 Joseph Myers * gcc.dg/ucnid-6.c: Fix typo in dg-do directive. diff --git a/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c b/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c new file mode 100644 index 00000000000..7f96a56841c --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/utf8-5byte-1.c @@ -0,0 +1,17 @@ +/* Test for bug in conversions from 5-byte UTF-8 sequences in + cpplib. */ +/* { dg-do run { target { 4byte_wchar_t } } } */ +/* { dg-options "-std=gnu99" } */ + +extern void abort (void); +extern void exit (int); + +__WCHAR_TYPE__ ws[] = L"û¿¿¿¿"; + +int +main (void) +{ + if (ws[0] != L'\U03FFFFFF' || ws[1] != 0) + abort (); + exit (0); +} diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 24f3f8debcf..a541b69211c 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,8 @@ +2009-05-03 Joseph Myers + + * charset.c (one_utf8_to_cppchar): Correct mask used for 5-byte + UTF-8 sequences. + 2009-04-25 Joseph Myers PR preprocessor/39559 diff --git a/libcpp/charset.c b/libcpp/charset.c index e743b1e277f..f1da4265ddd 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -169,7 +169,7 @@ static inline int one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp, cppchar_t *cp) { - static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 }; + static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 }; static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; cppchar_t c;