Reject invalid 5-byte sequences when detecting UTF-8 encoding
* src/coding.c (detect_coding_utf_8): Reject multibyte sequences whose leading byte is greater than MAX_MULTIBYTE_LEADING_CODE. (Bug#31829) * src/character.h (MAX_MULTIBYTE_LEADING_CODE): Add commentary about the connection between the value of this macro and MAX_CHAR.
This commit is contained in:
parent
0d3c35807d
commit
22aa665c9b
2 changed files with 6 additions and 2 deletions
|
@ -57,7 +57,8 @@ INLINE_HEADER_BEGIN
|
|||
|
||||
/* Minimum leading code of multibyte characters. */
|
||||
#define MIN_MULTIBYTE_LEADING_CODE 0xC0
|
||||
/* Maximum leading code of multibyte characters. */
|
||||
/* Maximum leading code of multibyte characters. Note: this must be
|
||||
updated if we ever increase MAX_CHAR above. */
|
||||
#define MAX_MULTIBYTE_LEADING_CODE 0xF8
|
||||
|
||||
/* Unicode character values. */
|
||||
|
|
|
@ -1225,7 +1225,10 @@ detect_coding_utf_8 (struct coding_system *coding,
|
|||
ONE_MORE_BYTE (c4);
|
||||
if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
|
||||
break;
|
||||
if (UTF_8_5_OCTET_LEADING_P (c))
|
||||
if (UTF_8_5_OCTET_LEADING_P (c)
|
||||
/* If we ever need to increase MAX_CHAR, the below may need
|
||||
to be reviewed. */
|
||||
&& c < MAX_MULTIBYTE_LEADING_CODE)
|
||||
{
|
||||
nchars++;
|
||||
continue;
|
||||
|
|
Loading…
Add table
Reference in a new issue