ucnid-2011-1.c: New test.
gcc/testsuite: * c-c++-common/cpp/ucnid-2011-1.c: New test. libcpp: * ucnid.tab: Add C11 and C11NOSTART data. * makeucnid.c (digit): Rename enum value to N99. (C11, N11, all_languages): New enum values. (NUM_CODE_POINTS, MAX_CODE_POINT): New macros. (flags, decomp, combining_value): Use NUM_CODE_POINTS as array size. (decomp): Use unsigned int as element type. (all_decomp): New array. (read_ucnid): Handle C11 and C11NOSTART. Use MAX_CODE_POINT. (read_table): Use MAX_CODE_POINT. Store all decompositions in all_decomp. (read_derived): Use MAX_CODE_POINT. (write_table): Use NUM_CODE_POINTS. Print N99, C11 and N11 flags. Print whole array variable declaration rather than just array contents. (char_id_valid, write_context_switch): New functions. (main): Call write_context_switch. * ucnid.h: Regenerate. * include/cpplib.h (struct cpp_options): Add c11_identifiers. * init.c (struct lang_flags): Add c11_identifiers. (cpp_set_lang): Set c11_identifiers option from selected language. * internal.h (struct normalize_state): Document "previous" as previous starter character. (NORMALIZE_STATE_UPDATE_IDNUM): Take character as argument. * charset.c (DIG): Rename enum value to N99. (C11, N11): New enum values. (struct ucnrange): Give name to struct. Use short for flags and unsigned int for end of range. Include ucnid.h for whole variable declaration. (ucn_valid_in_identifier): Allow for characters up to 0x10FFFF. Allow for C11 in determining valid characters and valid start characters. Use check_nfc for non-Hangul context-dependent checks. Only store starter characters in nst->previous. (_cpp_valid_ucn): Pass new argument to NORMALIZE_STATE_UPDATE_IDNUM. * lex.c (lex_identifier): Pass new argument to NORMALIZE_STATE_UPDATE_IDNUM. Call NORMALIZE_STATE_UPDATE_IDNUM after initial non-UCN part of identifier. (lex_number): Pass new argument to NORMALIZE_STATE_UPDATE_IDNUM. From-SVN: r204886
This commit is contained in:
parent
3d053a5f72
commit
d3f4ff8b51
11 changed files with 4783 additions and 840 deletions
|
@ -828,29 +828,32 @@ enum {
|
|||
/* Valid in a C99 identifier? */
|
||||
C99 = 1,
|
||||
/* Valid in a C99 identifier, but not as the first character? */
|
||||
DIG = 2,
|
||||
N99 = 2,
|
||||
/* Valid in a C++ identifier? */
|
||||
CXX = 4,
|
||||
/* Valid in a C11/C++11 identifier? */
|
||||
C11 = 8,
|
||||
/* Valid in a C11/C++11 identifier, but not as the first character? */
|
||||
N11 = 16,
|
||||
/* NFC representation is not valid in an identifier? */
|
||||
CID = 8,
|
||||
CID = 32,
|
||||
/* Might be valid NFC form? */
|
||||
NFC = 16,
|
||||
NFC = 64,
|
||||
/* Might be valid NFKC form? */
|
||||
NKC = 32,
|
||||
NKC = 128,
|
||||
/* Certain preceding characters might make it not valid NFC/NKFC form? */
|
||||
CTX = 64
|
||||
CTX = 256
|
||||
};
|
||||
|
||||
static const struct {
|
||||
struct ucnrange {
|
||||
/* Bitmap of flags above. */
|
||||
unsigned char flags;
|
||||
unsigned short flags;
|
||||
/* Combining class of the character. */
|
||||
unsigned char combine;
|
||||
/* Last character in the range described by this entry. */
|
||||
unsigned short end;
|
||||
} ucnranges[] = {
|
||||
#include "ucnid.h"
|
||||
unsigned int end;
|
||||
};
|
||||
#include "ucnid.h"
|
||||
|
||||
/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
|
||||
the start of an identifier, and 0 if C is not valid in an
|
||||
|
@ -864,8 +867,9 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
|||
struct normalize_state *nst)
|
||||
{
|
||||
int mn, mx, md;
|
||||
unsigned short valid_flags, invalid_start_flags;
|
||||
|
||||
if (c > 0xFFFF)
|
||||
if (c > 0x10FFFF)
|
||||
return 0;
|
||||
|
||||
mn = 0;
|
||||
|
@ -881,15 +885,25 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
|||
|
||||
/* When -pedantic, we require the character to have been listed by
|
||||
the standard for the current language. Otherwise, we accept the
|
||||
union of the acceptable sets for C++98 and C99. */
|
||||
if (! (ucnranges[mn].flags & (C99 | CXX)))
|
||||
union of the acceptable sets for all supported language versions. */
|
||||
valid_flags = C99 | CXX | C11;
|
||||
if (CPP_PEDANTIC (pfile))
|
||||
{
|
||||
if (CPP_OPTION (pfile, c11_identifiers))
|
||||
valid_flags = C11;
|
||||
else if (CPP_OPTION (pfile, c99))
|
||||
valid_flags = C99;
|
||||
else if (CPP_OPTION (pfile, cplusplus))
|
||||
valid_flags = CXX;
|
||||
}
|
||||
if (! (ucnranges[mn].flags & valid_flags))
|
||||
return 0;
|
||||
|
||||
if (CPP_PEDANTIC (pfile)
|
||||
&& ((CPP_OPTION (pfile, c99) && !(ucnranges[mn].flags & C99))
|
||||
|| (CPP_OPTION (pfile, cplusplus)
|
||||
&& !(ucnranges[mn].flags & CXX))))
|
||||
return 0;
|
||||
if (CPP_OPTION (pfile, c11_identifiers))
|
||||
invalid_start_flags = N11;
|
||||
else if (CPP_OPTION (pfile, c99))
|
||||
invalid_start_flags = N99;
|
||||
else
|
||||
invalid_start_flags = 0;
|
||||
|
||||
/* Update NST. */
|
||||
if (ucnranges[mn].combine != 0 && ucnranges[mn].combine < nst->prev_class)
|
||||
|
@ -899,17 +913,6 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
|||
bool safe;
|
||||
cppchar_t p = nst->previous;
|
||||
|
||||
/* Easy cases from Bengali, Oriya, Tamil, Jannada, and Malayalam. */
|
||||
if (c == 0x09BE)
|
||||
safe = p != 0x09C7; /* Use 09CB instead of 09C7 09BE. */
|
||||
else if (c == 0x0B3E)
|
||||
safe = p != 0x0B47; /* Use 0B4B instead of 0B47 0B3E. */
|
||||
else if (c == 0x0BBE)
|
||||
safe = p != 0x0BC6 && p != 0x0BC7; /* Use 0BCA/0BCB instead. */
|
||||
else if (c == 0x0CC2)
|
||||
safe = p != 0x0CC6; /* Use 0CCA instead of 0CC6 0CC2. */
|
||||
else if (c == 0x0D3E)
|
||||
safe = p != 0x0D46 && p != 0x0D47; /* Use 0D4A/0D4B instead. */
|
||||
/* For Hangul, characters in the range AC00-D7A3 are NFC/NFKC,
|
||||
and are combined algorithmically from a sequence of the form
|
||||
1100-1112 1161-1175 11A8-11C2
|
||||
|
@ -917,20 +920,19 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
|||
really a valid character).
|
||||
Unfortunately, C99 allows (only) the NFC form, but C++ allows
|
||||
only the combining characters. */
|
||||
else if (c >= 0x1161 && c <= 0x1175)
|
||||
if (c >= 0x1161 && c <= 0x1175)
|
||||
safe = p < 0x1100 || p > 0x1112;
|
||||
else if (c >= 0x11A8 && c <= 0x11C2)
|
||||
safe = (p < 0xAC00 || p > 0xD7A3 || (p - 0xAC00) % 28 != 0);
|
||||
else
|
||||
safe = check_nfc (pfile, c, p);
|
||||
if (!safe)
|
||||
{
|
||||
/* Uh-oh, someone updated ucnid.h without updating this code. */
|
||||
cpp_error (pfile, CPP_DL_ICE, "Character %x might not be NFKC", c);
|
||||
safe = true;
|
||||
if ((c >= 0x1161 && c <= 0x1175) || (c >= 0x11A8 && c <= 0x11C2))
|
||||
nst->level = MAX (nst->level, normalized_identifier_C);
|
||||
else
|
||||
nst->level = normalized_none;
|
||||
}
|
||||
if (!safe && c < 0x1161)
|
||||
nst->level = normalized_none;
|
||||
else if (!safe)
|
||||
nst->level = MAX (nst->level, normalized_identifier_C);
|
||||
}
|
||||
else if (ucnranges[mn].flags & NKC)
|
||||
;
|
||||
|
@ -940,11 +942,13 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
|||
nst->level = MAX (nst->level, normalized_identifier_C);
|
||||
else
|
||||
nst->level = normalized_none;
|
||||
nst->previous = c;
|
||||
if (ucnranges[mn].combine == 0)
|
||||
nst->previous = c;
|
||||
nst->prev_class = ucnranges[mn].combine;
|
||||
|
||||
/* In C99, UCN digits may not begin identifiers. */
|
||||
if (CPP_OPTION (pfile, c99) && (ucnranges[mn].flags & DIG))
|
||||
/* In C99, UCN digits may not begin identifiers. In C11 and C++11,
|
||||
UCN combining characters may not begin identifiers. */
|
||||
if (ucnranges[mn].flags & invalid_start_flags)
|
||||
return 2;
|
||||
|
||||
return 1;
|
||||
|
@ -1054,7 +1058,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
|||
CPP_OPTION (pfile, warn_dollars) = 0;
|
||||
cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
|
||||
}
|
||||
NORMALIZE_STATE_UPDATE_IDNUM (nst);
|
||||
NORMALIZE_STATE_UPDATE_IDNUM (nst, result);
|
||||
}
|
||||
else if (identifier_pos)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue