Make [:graph:] act like [:print:] sans space
In POSIX [[:print:]] is equivalent to [ [:graph:]], so change [:graph:] so that it matches everything that [:print:] does, except for space. * doc/lispref/searching.texi (Char Classes): * etc/NEWS: * lisp/emacs-lisp/rx.el (rx): Document [:graph:] to be [:print:] sans ' '. * src/character.c, src/character.h (graphicp): New function. * src/regex.c (ISGRAPH) [emacs]: Use it. (BIT_GRAPH): New macro. (BIT_PRINT): Increase to 0x200, to make room for BIT_GRAPH. (re_wctype_to_bit) [! WIDE_CHAR_SUPPORT]: Return BIT_GRAPH for RECC_GRAPH. (re_match_2_internal) [emacs]: Use ISGRAPH if BIT_GRAPH, and ISPRINT if BIT_PRINT.
This commit is contained in:
parent
45d75c0b75
commit
a122a0276b
6 changed files with 33 additions and 20 deletions
|
@ -558,8 +558,11 @@ This matches any @acronym{ASCII} control character.
|
|||
This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]}
|
||||
matches any digit, as well as @samp{+} and @samp{-}.
|
||||
@item [:graph:]
|
||||
This matches graphic characters---everything except @acronym{ASCII} control
|
||||
characters, space, and the delete character.
|
||||
This matches graphic characters---everything except space,
|
||||
@acronym{ASCII} and non-@acronym{ASCII} control characters,
|
||||
surrogates, and codepoints unassigned by Unicode, as indicated by the
|
||||
Unicode @samp{general-category} property (@pxref{Character
|
||||
Properties}).
|
||||
@item [:lower:]
|
||||
This matches any lower-case letter, as determined by the current case
|
||||
table (@pxref{Case Tables}). If @code{case-fold-search} is
|
||||
|
@ -569,11 +572,8 @@ This matches any multibyte character (@pxref{Text Representations}).
|
|||
@item [:nonascii:]
|
||||
This matches any non-@acronym{ASCII} character.
|
||||
@item [:print:]
|
||||
This matches printing characters---everything except @acronym{ASCII}
|
||||
and non-@acronym{ASCII} control characters (including the delete
|
||||
character), surrogates, and codepoints unassigned by Unicode, as
|
||||
indicated by the Unicode @samp{general-category} property
|
||||
(@pxref{Character Properties}).
|
||||
This matches any printing character---either space, or a graphic
|
||||
character matched by @samp{[:graph:]}.
|
||||
@item [:punct:]
|
||||
This matches any punctuation character. (At present, for multibyte
|
||||
characters, it matches anything that has non-word syntax.)
|
||||
|
|
10
etc/NEWS
10
etc/NEWS
|
@ -629,12 +629,12 @@ notifications, if Emacs is compiled with file notification support.
|
|||
*** gulp.el
|
||||
|
||||
+++
|
||||
** The character class [:print:] in regular expressions
|
||||
no longer matches any multibyte character. Instead, Emacs now
|
||||
** The character classes [:graph:] and [:print:] in regular expressions
|
||||
no longer match every multibyte character. Instead, Emacs now
|
||||
consults the Unicode character properties to determine which
|
||||
characters are printable. In particular, surrogates and unassigned
|
||||
codepoints are now rejected by this class. If you want the old
|
||||
behavior, use [:multibyte:] instead.
|
||||
characters are graphic or printable. In particular, surrogates and
|
||||
unassigned codepoints are now rejected. If you want the old behavior,
|
||||
use [:multibyte:] instead.
|
||||
|
||||
|
||||
* New Modes and Packages in Emacs 25.1
|
||||
|
|
|
@ -965,12 +965,12 @@ CHAR
|
|||
matches space and tab only.
|
||||
|
||||
`graphic', `graph'
|
||||
matches graphic characters--everything except ASCII control chars,
|
||||
space, and DEL.
|
||||
matches graphic characters--everything except space, ASCII
|
||||
and non-ASCII control characters, surrogates, and codepoints
|
||||
unassigned by Unicode.
|
||||
|
||||
`printing', `print'
|
||||
matches printing characters--everything except ASCII and non-ASCII
|
||||
control characters, surrogates, and codepoints unassigned by Unicode.
|
||||
matches space and graphic characters.
|
||||
|
||||
`alphanumeric', `alnum'
|
||||
matches alphabetic characters and digits. (For multibyte characters,
|
||||
|
|
|
@ -1022,6 +1022,14 @@ decimalnump (int c)
|
|||
return gen_cat == UNICODE_CATEGORY_Nd;
|
||||
}
|
||||
|
||||
/* Return 'true' if C is a graphic character as defined by its
|
||||
Unicode properties. */
|
||||
bool
|
||||
graphicp (int c)
|
||||
{
|
||||
return c == ' ' || printablep (c);
|
||||
}
|
||||
|
||||
/* Return 'true' if C is a printable character as defined by its
|
||||
Unicode properties. */
|
||||
bool
|
||||
|
|
|
@ -662,6 +662,7 @@ extern Lisp_Object string_escape_byte8 (Lisp_Object);
|
|||
|
||||
extern bool alphabeticp (int);
|
||||
extern bool decimalnump (int);
|
||||
extern bool graphicp (int);
|
||||
extern bool printablep (int);
|
||||
|
||||
/* Return a translation table of id number ID. */
|
||||
|
|
12
src/regex.c
12
src/regex.c
|
@ -314,7 +314,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
|
|||
|
||||
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
|
||||
? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
|
||||
: 1)
|
||||
: graphicp (c))
|
||||
|
||||
# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
|
||||
? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
|
||||
|
@ -1875,7 +1875,8 @@ struct range_table_work_area
|
|||
#define BIT_MULTIBYTE 0x20
|
||||
#define BIT_ALPHA 0x40
|
||||
#define BIT_ALNUM 0x80
|
||||
#define BIT_PRINT 0x100
|
||||
#define BIT_GRAPH 0x100
|
||||
#define BIT_PRINT 0x200
|
||||
|
||||
|
||||
/* Set the bit for character C in a list. */
|
||||
|
@ -2074,7 +2075,7 @@ re_wctype_to_bit (re_wctype_t cc)
|
|||
{
|
||||
switch (cc)
|
||||
{
|
||||
case RECC_NONASCII: case RECC_GRAPH:
|
||||
case RECC_NONASCII:
|
||||
case RECC_MULTIBYTE: return BIT_MULTIBYTE;
|
||||
case RECC_ALPHA: return BIT_ALPHA;
|
||||
case RECC_ALNUM: return BIT_ALNUM;
|
||||
|
@ -2083,6 +2084,7 @@ re_wctype_to_bit (re_wctype_t cc)
|
|||
case RECC_UPPER: return BIT_UPPER;
|
||||
case RECC_PUNCT: return BIT_PUNCT;
|
||||
case RECC_SPACE: return BIT_SPACE;
|
||||
case RECC_GRAPH: return BIT_GRAPH;
|
||||
case RECC_PRINT: return BIT_PRINT;
|
||||
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
|
||||
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
|
||||
|
@ -5522,7 +5524,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const_re_char *string1,
|
|||
| (class_bits & BIT_UPPER && ISUPPER (c))
|
||||
| (class_bits & BIT_WORD && ISWORD (c))
|
||||
| (class_bits & BIT_ALPHA && ISALPHA (c))
|
||||
| (class_bits & BIT_ALNUM && ISALNUM (c)))
|
||||
| (class_bits & BIT_ALNUM && ISALNUM (c))
|
||||
| (class_bits & BIT_GRAPH && ISGRAPH (c))
|
||||
| (class_bits & BIT_PRINT && ISPRINT (c)))
|
||||
not = !not;
|
||||
else
|
||||
CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
|
||||
|
|
Loading…
Add table
Reference in a new issue