Make [:graph:] act like [:print:] sans space

In POSIX [[:print:]] is equivalent to [ [:graph:]], so change
[:graph:] so that it matches everything that [:print:] does,
except for space.
* doc/lispref/searching.texi (Char Classes):
* etc/NEWS:
* lisp/emacs-lisp/rx.el (rx):
Document [:graph:] to be [:print:] sans ' '.
* src/character.c, src/character.h (graphicp): New function.
* src/regex.c (ISGRAPH) [emacs]: Use it.
(BIT_GRAPH): New macro.
(BIT_PRINT): Increase to 0x200, to make room for BIT_GRAPH.
(re_wctype_to_bit) [! WIDE_CHAR_SUPPORT]:
Return BIT_GRAPH for RECC_GRAPH.
(re_match_2_internal) [emacs]: Use ISGRAPH if BIT_GRAPH,
and ISPRINT if BIT_PRINT.
This commit is contained in:
Paul Eggert 2015-04-15 00:26:32 -07:00
parent 45d75c0b75
commit a122a0276b
6 changed files with 33 additions and 20 deletions

View file

@ -558,8 +558,11 @@ This matches any @acronym{ASCII} control character.
This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]}
matches any digit, as well as @samp{+} and @samp{-}.
@item [:graph:]
This matches graphic characters---everything except @acronym{ASCII} control
characters, space, and the delete character.
This matches graphic characters---everything except space,
@acronym{ASCII} and non-@acronym{ASCII} control characters,
surrogates, and codepoints unassigned by Unicode, as indicated by the
Unicode @samp{general-category} property (@pxref{Character
Properties}).
@item [:lower:]
This matches any lower-case letter, as determined by the current case
table (@pxref{Case Tables}). If @code{case-fold-search} is
@ -569,11 +572,8 @@ This matches any multibyte character (@pxref{Text Representations}).
@item [:nonascii:]
This matches any non-@acronym{ASCII} character.
@item [:print:]
This matches printing characters---everything except @acronym{ASCII}
and non-@acronym{ASCII} control characters (including the delete
character), surrogates, and codepoints unassigned by Unicode, as
indicated by the Unicode @samp{general-category} property
(@pxref{Character Properties}).
This matches any printing character---either space, or a graphic
character matched by @samp{[:graph:]}.
@item [:punct:]
This matches any punctuation character. (At present, for multibyte
characters, it matches anything that has non-word syntax.)

View file

@ -629,12 +629,12 @@ notifications, if Emacs is compiled with file notification support.
*** gulp.el
+++
** The character class [:print:] in regular expressions
no longer matches any multibyte character. Instead, Emacs now
** The character classes [:graph:] and [:print:] in regular expressions
no longer match every multibyte character. Instead, Emacs now
consults the Unicode character properties to determine which
characters are printable. In particular, surrogates and unassigned
codepoints are now rejected by this class. If you want the old
behavior, use [:multibyte:] instead.
characters are graphic or printable. In particular, surrogates and
unassigned codepoints are now rejected. If you want the old behavior,
use [:multibyte:] instead.
* New Modes and Packages in Emacs 25.1

View file

@ -965,12 +965,12 @@ CHAR
matches space and tab only.
`graphic', `graph'
matches graphic characters--everything except ASCII control chars,
space, and DEL.
matches graphic characters--everything except space, ASCII
and non-ASCII control characters, surrogates, and codepoints
unassigned by Unicode.
`printing', `print'
matches printing characters--everything except ASCII and non-ASCII
control characters, surrogates, and codepoints unassigned by Unicode.
matches space and graphic characters.
`alphanumeric', `alnum'
matches alphabetic characters and digits. (For multibyte characters,

View file

@ -1022,6 +1022,14 @@ decimalnump (int c)
return gen_cat == UNICODE_CATEGORY_Nd;
}
/* Return 'true' if C is a graphic character as defined by its
Unicode properties. */
bool
graphicp (int c)
{
return c == ' ' || printablep (c);
}
/* Return 'true' if C is a printable character as defined by its
Unicode properties. */
bool

View file

@ -662,6 +662,7 @@ extern Lisp_Object string_escape_byte8 (Lisp_Object);
extern bool alphabeticp (int);
extern bool decimalnump (int);
extern bool graphicp (int);
extern bool printablep (int);
/* Return a translation table of id number ID. */

View file

@ -314,7 +314,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
: 1)
: graphicp (c))
# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
@ -1875,7 +1875,8 @@ struct range_table_work_area
#define BIT_MULTIBYTE 0x20
#define BIT_ALPHA 0x40
#define BIT_ALNUM 0x80
#define BIT_PRINT 0x100
#define BIT_GRAPH 0x100
#define BIT_PRINT 0x200
/* Set the bit for character C in a list. */
@ -2074,7 +2075,7 @@ re_wctype_to_bit (re_wctype_t cc)
{
switch (cc)
{
case RECC_NONASCII: case RECC_GRAPH:
case RECC_NONASCII:
case RECC_MULTIBYTE: return BIT_MULTIBYTE;
case RECC_ALPHA: return BIT_ALPHA;
case RECC_ALNUM: return BIT_ALNUM;
@ -2083,6 +2084,7 @@ re_wctype_to_bit (re_wctype_t cc)
case RECC_UPPER: return BIT_UPPER;
case RECC_PUNCT: return BIT_PUNCT;
case RECC_SPACE: return BIT_SPACE;
case RECC_GRAPH: return BIT_GRAPH;
case RECC_PRINT: return BIT_PRINT;
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
@ -5522,7 +5524,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const_re_char *string1,
| (class_bits & BIT_UPPER && ISUPPER (c))
| (class_bits & BIT_WORD && ISWORD (c))
| (class_bits & BIT_ALPHA && ISALPHA (c))
| (class_bits & BIT_ALNUM && ISALNUM (c)))
| (class_bits & BIT_ALNUM && ISALNUM (c))
| (class_bits & BIT_GRAPH && ISGRAPH (c))
| (class_bits & BIT_PRINT && ISPRINT (c)))
not = !not;
else
CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);