(ENCODE_ISO_CHARACTER_DIMENSION1): Pay attention to

CODING_FLAG_ISO_SAFE.
(ENCODE_ISO_CHARACTER_DIMENSION2): Likewise.
(safe_terminal_coding): New variable.
(Fset_safe_terminal_coding_system_internal): New function.
(init_coding_once): Initilize safe_terminal_coding.
(syms_of_coding): Declare set-safe-terminal-coding-system as a
Lisp function.
(Vmicrosoft_code_table): New variable.
(syms_of_coding): Declare it as a Lisp variable and initialize it.
(detect_coding_mask): Pay attention to Vmicrosoft_code_table.
This commit is contained in:
Kenichi Handa 1997-08-10 04:13:19 +00:00
parent fbaa2ed9b5
commit c482535825

View file

@ -278,13 +278,21 @@ Lisp_Object Vcoding_system_for_write;
/* Coding-system actually used in the latest I/O. */
Lisp_Object Vlast_coding_system_used;
/* A vector of length 256 which contains information about special
Microsoft codes. */
Lisp_Object Vmicrosoft_code_table;
/* Flag to inhibit code conversion of end-of-line format. */
int inhibit_eol_conversion;
/* Coding-system of what terminal accept for displaying. */
/* Coding system to be used to encode text for terminal display. */
struct coding_system terminal_coding;
/* Coding-system of what is sent from terminal keyboard. */
/* Coding system to be used to encode text for terminal display when
terminal coding system is nil. */
struct coding_system safe_terminal_coding;
/* Coding system of what is sent from terminal keyboard. */
struct coding_system keyboard_coding;
Lisp_Object Vfile_coding_system_alist;
@ -681,7 +689,16 @@ detect_coding_iso2022 (src, src_end)
if (c < 0x80)
break;
else if (c < 0xA0)
return 0;
{
if (VECTORP (Vmicrosoft_code_table)
&& !NILP (XVECTOR (Vmicrosoft_code_table)->contents[c]))
{
mask &= ~(CODING_CATEGORY_MASK_ISO_7
| CODING_CATEGORY_MASK_ISO_7_ELSE);
break;
}
return 0;
}
else
{
unsigned char *src_begin = src;
@ -1165,66 +1182,88 @@ decode_coding_iso2022 (coding, source, destination,
sequences are also produced in advance if necessary. */
#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
do { \
if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
{ \
if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
*dst++ = c1 & 0x7F; \
else \
*dst++ = c1 | 0x80; \
CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
break; \
} \
else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
{ \
*dst++ = c1 & 0x7F; \
break; \
} \
else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
{ \
*dst++ = c1 | 0x80; \
break; \
} \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
register. Then repeat the loop to actually produce the \
character. */ \
dst = encode_invocation_designation (charset, coding, dst); \
#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
do { \
if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
{ \
if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
*dst++ = c1 & 0x7F; \
else \
*dst++ = c1 | 0x80; \
CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
break; \
} \
else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
{ \
*dst++ = c1 & 0x7F; \
break; \
} \
else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
{ \
*dst++ = c1 | 0x80; \
break; \
} \
else if (coding->flags & CODING_FLAG_ISO_SAFE \
&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) \
== CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) \
{ \
/* We should not encode this character, instead produce one or \
two `?'s. */ \
*dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
if (CHARSET_WIDTH (charset) == 2) \
*dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
break; \
} \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
register. Then repeat the loop to actually produce the \
character. */ \
dst = encode_invocation_designation (charset, coding, dst); \
} while (1)
/* Produce codes for a DIMENSION2 character whose character set is
CHARSET and whose position-codes are C1 and C2. Designation and
invocation codes are also produced in advance if necessary. */
#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
do { \
if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
{ \
if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
*dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
else \
*dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
break; \
} \
else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
{ \
*dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
break; \
} \
else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
{ \
*dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
break; \
} \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
register. Then repeat the loop to actually produce the \
character. */ \
dst = encode_invocation_designation (charset, coding, dst); \
#define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2) \
do { \
if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
{ \
if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
*dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F; \
else \
*dst++ = c1 | 0x80, *dst++ = c2 | 0x80; \
CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; \
break; \
} \
else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0)) \
{ \
*dst++ = c1 & 0x7F, *dst++= c2 & 0x7F; \
break; \
} \
else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1)) \
{ \
*dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
break; \
} \
else if (coding->flags & CODING_FLAG_ISO_SAFE \
&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) \
== CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) \
{ \
/* We should not encode this character, instead produce one or \
two `?'s. */ \
*dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
if (CHARSET_WIDTH (charset) == 2) \
*dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
break; \
} \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
register. Then repeat the loop to actually produce the \
character. */ \
dst = encode_invocation_designation (charset, coding, dst); \
} while (1)
#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
@ -2331,7 +2370,9 @@ setup_coding_system (coding_system, coding)
| (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
| (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
| (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
| (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL));
| (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
| (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
);
/* Invoke graphic register 0 to plane 0. */
CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
@ -2415,34 +2456,35 @@ setup_coding_system (coding_system, coding)
default_reg_bits &= 3;
}
for (charset = 0; charset <= MAX_CHARSET; charset++)
if (CHARSET_VALID_P (charset)
&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
== CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
{
/* We have not yet decided where to designate CHARSET. */
int reg_bits = default_reg_bits;
if (! (coding->flags & CODING_FLAG_ISO_SAFE))
for (charset = 0; charset <= MAX_CHARSET; charset++)
if (CHARSET_VALID_P (charset)
&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
== CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
{
/* We have not yet decided where to designate CHARSET. */
int reg_bits = default_reg_bits;
if (CHARSET_CHARS (charset) == 96)
/* A charset of CHARS96 can't be designated to REG 0. */
reg_bits &= ~1;
if (CHARSET_CHARS (charset) == 96)
/* A charset of CHARS96 can't be designated to REG 0. */
reg_bits &= ~1;
if (reg_bits)
/* There exist some default graphic register. */
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
= (reg_bits & 1
? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
else
/* We anyway have to designate CHARSET to somewhere. */
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
= (CHARSET_CHARS (charset) == 94
? 0
: ((coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT
|| ! coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
? 1
: (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT
? 2 : 0)));
}
if (reg_bits)
/* There exist some default graphic register. */
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
= (reg_bits & 1
? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
else
/* We anyway have to designate CHARSET to somewhere. */
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
= (CHARSET_CHARS (charset) == 94
? 0
: ((coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT
|| ! coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
? 1
: (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT
? 2 : 0)));
}
}
coding->require_flushing = 1;
break;
@ -2608,34 +2650,33 @@ detect_coding_mask (src, src_bytes)
/* No valid ISO2022 code follows C. Try again. */
goto label_loop_detect_coding;
}
else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
/* C is an ISO2022 specific control code of C1,
or the first byte of SJIS's 2-byte character code,
or a leading code of Emacs. */
mask = (detect_coding_iso2022 (src, src_end)
| detect_coding_sjis (src, src_end)
| detect_coding_emacs_mule (src, src_end)
| CODING_CATEGORY_MASK_BINARY);
else if (c == ISO_CODE_CSI
&& (src < src_end
&& (*src == ']'
|| (src + 1 < src_end
&& src[1] == ']'
&& (*src == '0' || *src == '1' || *src == '2')))))
/* C is an ISO2022's control-sequence-introducer. */
mask = (detect_coding_iso2022 (src, src_end)
| detect_coding_sjis (src, src_end)
| detect_coding_emacs_mule (src, src_end)
| CODING_CATEGORY_MASK_BINARY);
else if (c < 0xA0)
/* C is the first byte of SJIS character code,
or a leading-code of Emacs. */
mask = (detect_coding_sjis (src, src_end)
| detect_coding_emacs_mule (src, src_end)
| CODING_CATEGORY_MASK_BINARY);
{
/* If C is a special Microsoft code,
or is an ISO2022 specific control code of C1 (SS2 or SS3),
or is an ISO2022 control-sequence-introducer (CSI),
we should also consider the possibility of someof ISO2022 codings. */
if ((VECTORP (Vmicrosoft_code_table)
&& !NILP (XVECTOR (Vmicrosoft_code_table)->contents[c]))
|| (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
|| (c == ISO_CODE_CSI
&& (src < src_end
&& (*src == ']'
|| (src + 1 < src_end
&& src[1] == ']'
&& (*src == '0' || *src == '1' || *src == '2'))))))
mask = (detect_coding_iso2022 (src, src_end)
| detect_coding_sjis (src, src_end)
| detect_coding_emacs_mule (src, src_end)
| CODING_CATEGORY_MASK_BINARY);
else
/* C is the first byte of SJIS character code, or a
leading-code of Emacs. */
mask = (detect_coding_sjis (src, src_end)
| detect_coding_emacs_mule (src, src_end)
| CODING_CATEGORY_MASK_BINARY);
}
else
/* C is a character of ISO2022 in graphic plane right,
or a SJIS's 1-byte character code (i.e. JISX0201),
@ -3547,6 +3588,18 @@ DEFUN ("set-terminal-coding-system-internal",
return Qnil;
}
DEFUN ("set-safe-terminal-coding-system-internal",
Fset_safe_terminal_coding_system_internal,
Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
(coding_system)
Lisp_Object coding_system;
{
CHECK_SYMBOL (coding_system, 0);
setup_coding_system (Fcheck_coding_system (coding_system),
&safe_terminal_coding);
return Qnil;
}
DEFUN ("terminal-coding-system",
Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
"Return coding-system of your terminal.")
@ -3710,6 +3763,7 @@ init_coding_once ()
setup_coding_system (Qnil, &keyboard_coding);
setup_coding_system (Qnil, &terminal_coding);
setup_coding_system (Qnil, &safe_terminal_coding);
#if defined (MSDOS) || defined (WINDOWSNT)
system_eol_type = CODING_EOL_CRLF;
@ -3824,6 +3878,7 @@ syms_of_coding ()
defsubr (&Sdecode_big5_char);
defsubr (&Sencode_big5_char);
defsubr (&Sset_terminal_coding_system_internal);
defsubr (&Sset_safe_terminal_coding_system_internal);
defsubr (&Sterminal_coding_system);
defsubr (&Sset_keyboard_coding_system_internal);
defsubr (&Skeyboard_coding_system);
@ -3954,6 +4009,16 @@ designate it with the escape sequence identifing revision (cdr part of the eleme
The car part is used for decoding a process output,\n\
the cdr part is used for encoding a text to be sent to a process.");
Vdefault_process_coding_system = Qnil;
DEFVAR_LISP ("special-microsoft-code-table", &Vmicrosoft_code_table,
"Table of special Microsoft codes in the range 128..159 (inclusive).\n\
This is a vector of length 256.\n\
If Nth element is non-nil, the existence of code N in a file\n\
(or output of subprocess) doesn't prevent it to be detected as\n\
a coding system of ISO 2022 variant (e.g. iso-latin-1) on reading a file\n\
or reading output of a subprocess.\n\
Only 128th through 159th elements has a meaning.");
Vmicrosoft_code_table = Fmake_vector (make_number (256), Qnil);
}
#endif /* emacs */