ucnid-2011-1.c: New test.

gcc/testsuite:
	* c-c++-common/cpp/ucnid-2011-1.c: New test.

libcpp:
	* ucnid.tab: Add C11 and C11NOSTART data.
	* makeucnid.c (digit): Rename enum value to N99.
	(C11, N11, all_languages): New enum values.
	(NUM_CODE_POINTS, MAX_CODE_POINT): New macros.
	(flags, decomp, combining_value): Use NUM_CODE_POINTS as array
	size.
	(decomp): Use unsigned int as element type.
	(all_decomp): New array.
	(read_ucnid): Handle C11 and C11NOSTART.  Use MAX_CODE_POINT.
	(read_table): Use MAX_CODE_POINT.  Store all decompositions in
	all_decomp.
	(read_derived): Use MAX_CODE_POINT.
	(write_table): Use NUM_CODE_POINTS.  Print N99, C11 and N11
	flags.  Print whole array variable declaration rather than just
	array contents.
	(char_id_valid, write_context_switch): New functions.
	(main): Call write_context_switch.
	* ucnid.h: Regenerate.
	* include/cpplib.h (struct cpp_options): Add c11_identifiers.
	* init.c (struct lang_flags): Add c11_identifiers.
	(cpp_set_lang): Set c11_identifiers option from selected language.
	* internal.h (struct normalize_state): Document "previous" as
	previous starter character.
	(NORMALIZE_STATE_UPDATE_IDNUM): Take character as argument.
	* charset.c (DIG): Rename enum value to N99.
	(C11, N11): New enum values.
	(struct ucnrange): Give name to struct.  Use short for flags and
	unsigned int for end of range.  Include ucnid.h for whole variable
	declaration.
	(ucn_valid_in_identifier): Allow for characters up to 0x10FFFF.
	Allow for C11 in determining valid characters and valid start
	characters.  Use check_nfc for non-Hangul context-dependent
	checks.  Only store starter characters in nst->previous.
	(_cpp_valid_ucn): Pass new argument to
	NORMALIZE_STATE_UPDATE_IDNUM.
	* lex.c (lex_identifier): Pass new argument to
	NORMALIZE_STATE_UPDATE_IDNUM.  Call NORMALIZE_STATE_UPDATE_IDNUM
	after initial non-UCN part of identifier.
	(lex_number): Pass new argument to NORMALIZE_STATE_UPDATE_IDNUM.

From-SVN: r204886
This commit is contained in:
Joseph Myers 2013-11-16 00:05:08 +00:00 committed by Joseph Myers
parent 3d053a5f72
commit d3f4ff8b51
11 changed files with 4783 additions and 840 deletions

View file

@ -828,29 +828,32 @@ enum {
/* Valid in a C99 identifier? */
C99 = 1,
/* Valid in a C99 identifier, but not as the first character? */
DIG = 2,
N99 = 2,
/* Valid in a C++ identifier? */
CXX = 4,
/* Valid in a C11/C++11 identifier? */
C11 = 8,
/* Valid in a C11/C++11 identifier, but not as the first character? */
N11 = 16,
/* NFC representation is not valid in an identifier? */
CID = 8,
CID = 32,
/* Might be valid NFC form? */
NFC = 16,
NFC = 64,
/* Might be valid NFKC form? */
NKC = 32,
NKC = 128,
/* Certain preceding characters might make it not valid NFC/NKFC form? */
CTX = 64
CTX = 256
};
static const struct {
struct ucnrange {
/* Bitmap of flags above. */
unsigned char flags;
unsigned short flags;
/* Combining class of the character. */
unsigned char combine;
/* Last character in the range described by this entry. */
unsigned short end;
} ucnranges[] = {
#include "ucnid.h"
unsigned int end;
};
#include "ucnid.h"
/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
the start of an identifier, and 0 if C is not valid in an
@ -864,8 +867,9 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
struct normalize_state *nst)
{
int mn, mx, md;
unsigned short valid_flags, invalid_start_flags;
if (c > 0xFFFF)
if (c > 0x10FFFF)
return 0;
mn = 0;
@ -881,15 +885,25 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
/* When -pedantic, we require the character to have been listed by
the standard for the current language. Otherwise, we accept the
union of the acceptable sets for C++98 and C99. */
if (! (ucnranges[mn].flags & (C99 | CXX)))
union of the acceptable sets for all supported language versions. */
valid_flags = C99 | CXX | C11;
if (CPP_PEDANTIC (pfile))
{
if (CPP_OPTION (pfile, c11_identifiers))
valid_flags = C11;
else if (CPP_OPTION (pfile, c99))
valid_flags = C99;
else if (CPP_OPTION (pfile, cplusplus))
valid_flags = CXX;
}
if (! (ucnranges[mn].flags & valid_flags))
return 0;
if (CPP_PEDANTIC (pfile)
&& ((CPP_OPTION (pfile, c99) && !(ucnranges[mn].flags & C99))
|| (CPP_OPTION (pfile, cplusplus)
&& !(ucnranges[mn].flags & CXX))))
return 0;
if (CPP_OPTION (pfile, c11_identifiers))
invalid_start_flags = N11;
else if (CPP_OPTION (pfile, c99))
invalid_start_flags = N99;
else
invalid_start_flags = 0;
/* Update NST. */
if (ucnranges[mn].combine != 0 && ucnranges[mn].combine < nst->prev_class)
@ -899,17 +913,6 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
bool safe;
cppchar_t p = nst->previous;
/* Easy cases from Bengali, Oriya, Tamil, Jannada, and Malayalam. */
if (c == 0x09BE)
safe = p != 0x09C7; /* Use 09CB instead of 09C7 09BE. */
else if (c == 0x0B3E)
safe = p != 0x0B47; /* Use 0B4B instead of 0B47 0B3E. */
else if (c == 0x0BBE)
safe = p != 0x0BC6 && p != 0x0BC7; /* Use 0BCA/0BCB instead. */
else if (c == 0x0CC2)
safe = p != 0x0CC6; /* Use 0CCA instead of 0CC6 0CC2. */
else if (c == 0x0D3E)
safe = p != 0x0D46 && p != 0x0D47; /* Use 0D4A/0D4B instead. */
/* For Hangul, characters in the range AC00-D7A3 are NFC/NFKC,
and are combined algorithmically from a sequence of the form
1100-1112 1161-1175 11A8-11C2
@ -917,20 +920,19 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
really a valid character).
Unfortunately, C99 allows (only) the NFC form, but C++ allows
only the combining characters. */
else if (c >= 0x1161 && c <= 0x1175)
if (c >= 0x1161 && c <= 0x1175)
safe = p < 0x1100 || p > 0x1112;
else if (c >= 0x11A8 && c <= 0x11C2)
safe = (p < 0xAC00 || p > 0xD7A3 || (p - 0xAC00) % 28 != 0);
else
safe = check_nfc (pfile, c, p);
if (!safe)
{
/* Uh-oh, someone updated ucnid.h without updating this code. */
cpp_error (pfile, CPP_DL_ICE, "Character %x might not be NFKC", c);
safe = true;
if ((c >= 0x1161 && c <= 0x1175) || (c >= 0x11A8 && c <= 0x11C2))
nst->level = MAX (nst->level, normalized_identifier_C);
else
nst->level = normalized_none;
}
if (!safe && c < 0x1161)
nst->level = normalized_none;
else if (!safe)
nst->level = MAX (nst->level, normalized_identifier_C);
}
else if (ucnranges[mn].flags & NKC)
;
@ -940,11 +942,13 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
nst->level = MAX (nst->level, normalized_identifier_C);
else
nst->level = normalized_none;
nst->previous = c;
if (ucnranges[mn].combine == 0)
nst->previous = c;
nst->prev_class = ucnranges[mn].combine;
/* In C99, UCN digits may not begin identifiers. */
if (CPP_OPTION (pfile, c99) && (ucnranges[mn].flags & DIG))
/* In C99, UCN digits may not begin identifiers. In C11 and C++11,
UCN combining characters may not begin identifiers. */
if (ucnranges[mn].flags & invalid_start_flags)
return 2;
return 1;
@ -1054,7 +1058,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
CPP_OPTION (pfile, warn_dollars) = 0;
cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
}
NORMALIZE_STATE_UPDATE_IDNUM (nst);
NORMALIZE_STATE_UPDATE_IDNUM (nst, result);
}
else if (identifier_pos)
{