Warning comments about subtleties of fetching characters from buffers/strings.
src/buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR): src/character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments about subtle differences between FETCH_CHAR* and STRING_CHAR* macros related to unification of CJK characters. For the details, see the discussion following the message here: http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
This commit is contained in:
parent
ea0ff31442
commit
2f8e16b2a3
3 changed files with 34 additions and 4 deletions
|
@ -1,3 +1,12 @@
|
|||
2012-04-06 Eli Zaretskii <eliz@gnu.org>
|
||||
|
||||
* buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR):
|
||||
* character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments
|
||||
about subtle differences between FETCH_CHAR* and STRING_CHAR*
|
||||
macros related to unification of CJK characters. For the details,
|
||||
see the discussion following the message here:
|
||||
http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
|
||||
|
||||
2012-04-04 Chong Yidong <cyd@gnu.org>
|
||||
|
||||
* keyboard.c (Vdelayed_warnings_list): Doc fix.
|
||||
|
|
15
src/buffer.h
15
src/buffer.h
|
@ -343,7 +343,8 @@ while (0)
|
|||
- (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \
|
||||
+ BEG_BYTE)
|
||||
|
||||
/* Return character at byte position POS. */
|
||||
/* Return character at byte position POS. See the caveat WARNING for
|
||||
FETCH_MULTIBYTE_CHAR below. */
|
||||
|
||||
#define FETCH_CHAR(pos) \
|
||||
(!NILP (BVAR (current_buffer, enable_multibyte_characters)) \
|
||||
|
@ -359,7 +360,17 @@ extern unsigned char *_fetch_multibyte_char_p;
|
|||
|
||||
/* Return character code of multi-byte form at byte position POS. If POS
|
||||
doesn't point the head of valid multi-byte form, only the byte at
|
||||
POS is returned. No range checking. */
|
||||
POS is returned. No range checking.
|
||||
|
||||
WARNING: The character returned by this macro could be "unified"
|
||||
inside STRING_CHAR, if the original character in the buffer belongs
|
||||
to one of the Private Use Areas (PUAs) of codepoints that Emacs
|
||||
uses to support non-unified CJK characters. If that happens,
|
||||
CHAR_BYTES will return a value that is different from the length of
|
||||
the original multibyte sequence stored in the buffer. Therefore,
|
||||
do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through
|
||||
the buffer to the next character after fetching this one. Instead,
|
||||
use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH. */
|
||||
|
||||
#define FETCH_MULTIBYTE_CHAR(pos) \
|
||||
(_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \
|
||||
|
|
|
@ -292,7 +292,9 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
|
|||
} while (0)
|
||||
|
||||
/* Return the character code of character whose multibyte form is at
|
||||
P. */
|
||||
P. Note that this macro unifies CJK characters whose codepoints
|
||||
are in the Private Use Areas (PUAs), so it might return a different
|
||||
codepoint from the one actually stored at P. */
|
||||
|
||||
#define STRING_CHAR(p) \
|
||||
(!((p)[0] & 0x80) \
|
||||
|
@ -309,7 +311,15 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
|
|||
|
||||
|
||||
/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
|
||||
form. */
|
||||
form.
|
||||
|
||||
Note: This macro returns the actual length of the character's
|
||||
multibyte sequence as it is stored in a buffer or string. The
|
||||
character it returns might have a different codepoint that has a
|
||||
different multibyte sequence of a different legth, due to possible
|
||||
unification of CJK characters inside string_char. Therefore do NOT
|
||||
assume that the length returned by this macro is identical to the
|
||||
length of the multibyte sequence of the character it returns. */
|
||||
|
||||
#define STRING_CHAR_AND_LENGTH(p, actual_len) \
|
||||
(!((p)[0] & 0x80) \
|
||||
|
|
Loading…
Add table
Reference in a new issue