'[:graph:]' now excludes whitespace, not just ' '
* doc/lispref/searching.texi (Char Classes): * lisp/emacs-lisp/rx.el (rx): Document [:graph:] to be [:print:] sans whitespace (not sans space). * src/character.c (graphicp): Exclude all Unicode whitespace chars, not just space. * src/regex.c (ISGRAPH): Exclude U+00A0 (NO-BREAK SPACE).
This commit is contained in:
parent
5161c9ca6a
commit
3074a9fad1
4 changed files with 21 additions and 14 deletions
|
@ -558,7 +558,7 @@ This matches any @acronym{ASCII} control character.
|
|||
This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]}
|
||||
matches any digit, as well as @samp{+} and @samp{-}.
|
||||
@item [:graph:]
|
||||
This matches graphic characters---everything except space,
|
||||
This matches graphic characters---everything except whitespace,
|
||||
@acronym{ASCII} and non-@acronym{ASCII} control characters,
|
||||
surrogates, and codepoints unassigned by Unicode, as indicated by the
|
||||
Unicode @samp{general-category} property (@pxref{Character
|
||||
|
@ -572,7 +572,7 @@ This matches any multibyte character (@pxref{Text Representations}).
|
|||
@item [:nonascii:]
|
||||
This matches any non-@acronym{ASCII} character.
|
||||
@item [:print:]
|
||||
This matches any printing character---either space, or a graphic
|
||||
This matches any printing character---either whitespace, or a graphic
|
||||
character matched by @samp{[:graph:]}.
|
||||
@item [:punct:]
|
||||
This matches any punctuation character. (At present, for multibyte
|
||||
|
|
|
@ -965,12 +965,12 @@ CHAR
|
|||
matches space and tab only.
|
||||
|
||||
`graphic', `graph'
|
||||
matches graphic characters--everything except space, ASCII
|
||||
matches graphic characters--everything except whitespace, ASCII
|
||||
and non-ASCII control characters, surrogates, and codepoints
|
||||
unassigned by Unicode.
|
||||
|
||||
`printing', `print'
|
||||
matches space and graphic characters.
|
||||
matches whitespace and graphic characters.
|
||||
|
||||
`alphanumeric', `alnum'
|
||||
matches alphabetic characters and digits. (For multibyte characters,
|
||||
|
|
|
@ -984,8 +984,7 @@ character is not ASCII nor 8-bit character, an error is signaled. */)
|
|||
|
||||
#ifdef emacs
|
||||
|
||||
/* Return 'true' if C is an alphabetic character as defined by its
|
||||
Unicode properties. */
|
||||
/* Return true if C is an alphabetic character. */
|
||||
bool
|
||||
alphabeticp (int c)
|
||||
{
|
||||
|
@ -1008,8 +1007,7 @@ alphabeticp (int c)
|
|||
|| gen_cat == UNICODE_CATEGORY_Nl);
|
||||
}
|
||||
|
||||
/* Return 'true' if C is an decimal-number character as defined by its
|
||||
Unicode properties. */
|
||||
/* Return true if C is a decimal-number character. */
|
||||
bool
|
||||
decimalnump (int c)
|
||||
{
|
||||
|
@ -1022,16 +1020,25 @@ decimalnump (int c)
|
|||
return gen_cat == UNICODE_CATEGORY_Nd;
|
||||
}
|
||||
|
||||
/* Return 'true' if C is a graphic character as defined by its
|
||||
Unicode properties. */
|
||||
/* Return true if C is a graphic character. */
|
||||
bool
|
||||
graphicp (int c)
|
||||
{
|
||||
return c == ' ' || printablep (c);
|
||||
Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c);
|
||||
if (! INTEGERP (category))
|
||||
return false;
|
||||
EMACS_INT gen_cat = XINT (category);
|
||||
|
||||
/* See UTS #18. */
|
||||
return (!(gen_cat == UNICODE_CATEGORY_Zs /* space separator */
|
||||
|| gen_cat == UNICODE_CATEGORY_Zl /* line separator */
|
||||
|| gen_cat == UNICODE_CATEGORY_Zp /* paragraph separator */
|
||||
|| gen_cat == UNICODE_CATEGORY_Cc /* control */
|
||||
|| gen_cat == UNICODE_CATEGORY_Cs /* surrogate */
|
||||
|| gen_cat == UNICODE_CATEGORY_Cn)); /* unassigned */
|
||||
}
|
||||
|
||||
/* Return 'true' if C is a printable character as defined by its
|
||||
Unicode properties. */
|
||||
/* Return true if C is a printable character. */
|
||||
bool
|
||||
printablep (int c)
|
||||
{
|
||||
|
|
|
@ -313,7 +313,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
|
|||
/* The rest must handle multibyte characters. */
|
||||
|
||||
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
|
||||
? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
|
||||
? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
|
||||
: graphicp (c))
|
||||
|
||||
# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
|
||||
|
|
Loading…
Add table
Reference in a new issue