(create_process, Fopen_network_stream): Typo in indexes

of array proc_encode_coding_system fixed.
Remove prefix "coding-system-" from coding system symbol names.
(encode_coding) : Fix typo ("=" -> "==").
(detect_coding_iso2022): Detect coding-category-iso-8-2
more precisely.
(ENCODE_RESET_PLANE_AND_REGISTER): Argument `eol' is
deleted.  Don't call ENCODE_DESIGNATION if nothing designated
initially.
(encode_designation_at_bol) New function.
(encode_coding_iso2022): Handle CODING_FLAG_ISO_INIT_AT_BOL and
CODING_FLAG_ISO_DESIGNATE_AT_BOL.
(setup_coding_system): Now, flags of ISO2022 coding
systems contains charsets instead of charset IDs.
(detect_coding_iso2022, decode_coding_iso2022): Make the code
robust against invalid SI and SO.
(Ffind_coding_system, syms_of_coding): Escape newlines in docstring.
(setup_coding_system): Correct setting coding->symbol
and coding->eol_type.  The performance improved.
This commit is contained in:
Kenichi Handa 1997-02-27 11:10:42 +00:00
parent 8ddb35b2ab
commit e0e989f659

View file

@ -581,45 +581,43 @@ int
detect_coding_iso2022 (src, src_end)
unsigned char *src, *src_end;
{
unsigned char graphic_register[4];
unsigned char c, esc_cntl;
unsigned char c, g1 = 0;
int mask = (CODING_CATEGORY_MASK_ISO_7
| CODING_CATEGORY_MASK_ISO_8_1
| CODING_CATEGORY_MASK_ISO_8_2);
/* We may look ahead maximum 3 bytes. */
unsigned char *adjusted_src_end = src_end - 3;
/* We may look ahead at most 4 bytes. */
unsigned char *adjusted_src_end = src_end - 4;
int i;
for (i = 0; i < 4; i++)
graphic_register[i] = CHARSET_ASCII;
while (src < adjusted_src_end)
while (src < src_end)
{
c = *src++;
switch (c)
{
case ISO_CODE_ESC:
if (src >= adjusted_src_end)
if (src >= src_end)
break;
c = *src++;
if (c == '$')
if (src + 2 >= src_end
&& ((c >= '(' && c <= '/')
|| c == '$' && ((*src >= '(' && *src <= '/')
|| (*src >= '@' && *src <= 'B'))))
{
/* Designation of 2-byte character set. */
if (src >= adjusted_src_end)
break;
c = *src++;
/* Valid designation sequence. */
if (c == ')' || (c == '$' && *src == ')'))
g1 = 1;
src++;
break;
}
if ((c >= ')' && c <= '+') || (c >= '-' && c <= '/'))
/* Designation to graphic register 1, 2, or 3. */
mask &= ~CODING_CATEGORY_MASK_ISO_7;
else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
return CODING_CATEGORY_MASK_ISO_ELSE;
break;
case ISO_CODE_SI:
case ISO_CODE_SO:
return CODING_CATEGORY_MASK_ISO_ELSE;
if (g1)
return CODING_CATEGORY_MASK_ISO_ELSE;
break;
case ISO_CODE_CSI:
case ISO_CODE_SS2:
case ISO_CODE_SS3:
@ -636,9 +634,9 @@ detect_coding_iso2022 (src, src_end)
int count = 1;
mask &= ~CODING_CATEGORY_MASK_ISO_7;
while (src < adjusted_src_end && *src >= 0xA0)
while (src < src_end && *src >= 0xA0)
count++, src++;
if (count & 1 && src < adjusted_src_end)
if (count & 1 && src < src_end)
mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
}
break;
@ -794,6 +792,8 @@ decode_coding_iso2022 (coding, source, destination,
break;
case ISO_shift_out:
if (CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
goto label_invalid_escape_sequence;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
break;
@ -830,14 +830,10 @@ decode_coding_iso2022 (coding, source, destination,
case '&': /* revision of following character set */
ONE_MORE_BYTE (c1);
if (!(c1 >= '@' && c1 <= '~'))
{
goto label_invalid_escape_sequence;
}
goto label_invalid_escape_sequence;
ONE_MORE_BYTE (c1);
if (c1 != ISO_CODE_ESC)
{
goto label_invalid_escape_sequence;
}
goto label_invalid_escape_sequence;
ONE_MORE_BYTE (c1);
goto label_escape_sequence;
@ -859,26 +855,34 @@ decode_coding_iso2022 (coding, source, destination,
DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
}
else
{
goto label_invalid_escape_sequence;
}
goto label_invalid_escape_sequence;
break;
case 'n': /* invocation of locking-shift-2 */
if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
goto label_invalid_escape_sequence;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
break;
case 'o': /* invocation of locking-shift-3 */
if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
goto label_invalid_escape_sequence;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
break;
case 'N': /* invocation of single-shift-2 */
if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
goto label_invalid_escape_sequence;
ONE_MORE_BYTE (c1);
charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
DECODE_ISO_CHARACTER (charset, c1);
break;
case 'O': /* invocation of single-shift-3 */
if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
goto label_invalid_escape_sequence;
ONE_MORE_BYTE (c1);
charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
DECODE_ISO_CHARACTER (charset, c1);
@ -1246,24 +1250,63 @@ encode_invocation_designation (charset, coding, dst)
/* Produce codes for designation and invocation to reset the graphic
planes and registers to initial state. */
#define ENCODE_RESET_PLANE_AND_REGISTER(eol) \
do { \
int reg; \
if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
ENCODE_SHIFT_IN; \
for (reg = 0; reg < 4; reg++) \
{ \
if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) < 0) \
{ \
if (eol) CODING_SPEC_ISO_DESIGNATION (coding, reg) = -1; \
} \
else if (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
!= CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)) \
ENCODE_DESIGNATION \
(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
} \
#define ENCODE_RESET_PLANE_AND_REGISTER \
do { \
int reg; \
if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
ENCODE_SHIFT_IN; \
for (reg = 0; reg < 4; reg++) \
if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
&& (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
!= CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
ENCODE_DESIGNATION \
(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
} while (0)
int
encode_designation_at_bol (coding, src, src_end, dstp)
struct coding_system *coding;
unsigned char *src, *src_end, **dstp;
{
int charset, reg, r[4];
unsigned char *dst = *dstp, c;
for (reg = 0; reg < 4; reg++) r[reg] = -1;
while (src < src_end && (c = *src++) != '\n')
{
switch (emacs_code_class[c])
{
case EMACS_ascii_code:
charset = CHARSET_ASCII;
break;
case EMACS_leading_code_2:
if (++src >= src_end) continue;
charset = c;
break;
case EMACS_leading_code_3:
if ((src += 2) >= src_end) continue;
charset = (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2));
break;
case EMACS_leading_code_4:
if ((src += 3) >= src_end) continue;
charset = *(src - 3);
break;
default:
continue;
}
reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
if (r[reg] < 0
&& CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset)
r[reg] = charset;
}
if (c != '\n' && !coding->last_block)
return -1;
for (reg = 0; reg < 4; reg++)
if (r[reg] >= 0)
ENCODE_DESIGNATION (r[reg], reg, coding);
*dstp = dst;
return 0;
}
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
int
@ -1278,10 +1321,10 @@ encode_coding_iso2022 (coding, source, destination,
unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
/* Since the maximum bytes produced by each loop is 6, we subtract 5
/* Since the maximum bytes produced by each loop is 20, we subtract 19
from DST_END to assure overflow checking is necessary only at the
head of loop. */
unsigned char *adjusted_dst_end = dst_end - 5;
unsigned char *adjusted_dst_end = dst_end - 19;
while (src < src_end && dst < adjusted_dst_end)
{
@ -1291,9 +1334,22 @@ encode_coding_iso2022 (coding, source, destination,
TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
reset to SRC_BASE before exiting. */
unsigned char *src_base = src;
unsigned char c1 = *src++, c2, c3, c4;
unsigned char c1, c2, c3, c4;
int charset;
if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
&& CODING_SPEC_ISO_BOL (coding))
{
/* We have to produce destination sequences now. */
if (encode_designation_at_bol (coding, src, src_end, &dst) < 0)
/* We can't find end of line in the current block. Let's
repeat encoding starting from the current position
pointed by SRC. */
break;
CODING_SPEC_ISO_BOL (coding) = 0;
}
c1 = *src++;
/* If we are seeing a component of a composite character, we are
seeing a leading-code specially encoded for composition, or a
composition rule if composing with rule. We must set C1
@ -1339,7 +1395,7 @@ encode_coding_iso2022 (coding, source, destination,
case EMACS_control_code:
if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
ENCODE_RESET_PLANE_AND_REGISTER (0);
ENCODE_RESET_PLANE_AND_REGISTER;
*dst++ = c1;
break;
@ -1347,7 +1403,7 @@ encode_coding_iso2022 (coding, source, destination,
if (!coding->selective)
{
if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
ENCODE_RESET_PLANE_AND_REGISTER (0);
ENCODE_RESET_PLANE_AND_REGISTER;
*dst++ = c1;
break;
}
@ -1355,7 +1411,11 @@ encode_coding_iso2022 (coding, source, destination,
case EMACS_linefeed_code:
if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
ENCODE_RESET_PLANE_AND_REGISTER (1);
ENCODE_RESET_PLANE_AND_REGISTER;
if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
bcopy (coding->spec.iso2022.initial_designation,
coding->spec.iso2022.current_designation,
sizeof coding->spec.iso2022.initial_designation);
if (coding->eol_type == CODING_EOL_LF
|| coding->eol_type == CODING_EOL_AUTOMATIC)
*dst++ = ISO_CODE_LF;
@ -1363,6 +1423,7 @@ encode_coding_iso2022 (coding, source, destination,
*dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
else
*dst++ = ISO_CODE_CR;
CODING_SPEC_ISO_BOL (coding) = 1;
break;
case EMACS_leading_code_2:
@ -1418,7 +1479,7 @@ encode_coding_iso2022 (coding, source, destination,
the text although they are not valid characters. */
if (coding->last_block)
{
ENCODE_RESET_PLANE_AND_REGISTER (1);
ENCODE_RESET_PLANE_AND_REGISTER;
bcopy(src, dst, src_end - src);
dst += (src_end - src);
src = src_end;
@ -1985,11 +2046,10 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
return 0. */
int
setup_coding_system (coding_system_symbol, coding)
Lisp_Object coding_system_symbol;
setup_coding_system (coding_system, coding)
Lisp_Object coding_system;
struct coding_system *coding;
{
Lisp_Object coding_system_vector = Qnil;
Lisp_Object type, eol_type;
/* At first, set several fields default values. */
@ -1999,44 +2059,28 @@ setup_coding_system (coding_system_symbol, coding)
coding->composing = 0;
coding->direction = 0;
coding->carryover_size = 0;
coding->symbol = Qnil;
coding->post_read_conversion = coding->pre_write_conversion = Qnil;
/* Get value of property `coding-system'. If it is a Lisp symbol
pointing another coding system, fetch its property until we get a
vector. */
while (!NILP (coding_system_symbol))
{
coding->symbol = coding_system_symbol;
if (NILP (coding->post_read_conversion))
coding->post_read_conversion = Fget (coding_system_symbol,
Qpost_read_conversion);
if (NILP (coding->pre_write_conversion))
coding->pre_write_conversion = Fget (coding_system_symbol,
Qpre_write_conversion);
coding_system_vector = Fget (coding_system_symbol, Qcoding_system);
if (VECTORP (coding_system_vector))
break;
coding_system_symbol = coding_system_vector;
}
Vlast_coding_system_used = coding->symbol;
if (!VECTORP (coding_system_vector)
|| XVECTOR (coding_system_vector)->size != 5)
goto label_invalid_coding_system;
/* Get value of property `eol-type' by searching from the root
coding-system. */
coding_system_symbol = coding->symbol;
Vlast_coding_system_used = coding->symbol = coding_system;
eol_type = Qnil;
while (SYMBOLP (coding_system_symbol) && !NILP (coding_system_symbol))
/* Get value of property `coding-system' until we get a vector.
While doing that, also get values of properties
`post-read-conversion', `pre-write-conversion', and `eol-type'. */
while (!NILP (coding_system) && SYMBOLP (coding_system))
{
eol_type = Fget (coding_system_symbol, Qeol_type);
if (!NILP (eol_type))
break;
coding_system_symbol = Fget (coding_system_symbol, Qcoding_system);
if (NILP (coding->post_read_conversion))
coding->post_read_conversion = Fget (coding_system,
Qpost_read_conversion);
if (NILP (coding->pre_write_conversion))
coding->pre_write_conversion = Fget (coding_system,
Qpre_write_conversion);
if (NILP (eol_type))
eol_type = Fget (coding_system, Qeol_type);
coding_system = Fget (coding_system, Qcoding_system);
}
if (!VECTORP (coding_system)
|| XVECTOR (coding_system)->size != 5)
goto label_invalid_coding_system;
if (VECTORP (eol_type))
coding->eol_type = CODING_EOL_AUTOMATIC;
@ -2047,7 +2091,7 @@ setup_coding_system (coding_system_symbol, coding)
else
coding->eol_type = CODING_EOL_LF;
type = XVECTOR (coding_system_vector)->contents[0];
type = XVECTOR (coding_system)->contents[0];
switch (XFASTINT (type))
{
case 0:
@ -2061,7 +2105,7 @@ setup_coding_system (coding_system_symbol, coding)
case 2:
coding->type = coding_type_iso2022;
{
Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
Lisp_Object val = XVECTOR (coding_system)->contents[4];
Lisp_Object *flags;
int i, charset, default_reg_bits = 0;
@ -2078,7 +2122,9 @@ setup_coding_system (coding_system_symbol, coding)
| (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
| (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
| (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
| (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION));
| (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
| (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
| (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL));
/* Invoke graphic register 0 to plane 0. */
CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
@ -2087,6 +2133,8 @@ setup_coding_system (coding_system_symbol, coding)
= (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
/* Not single shifting at first. */
CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0;
/* Beginning of buffer should also be regarded as bol. */
CODING_SPEC_ISO_BOL(coding) = 1;
/* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
FLAGS[REG] can be one of below:
@ -2103,7 +2151,8 @@ setup_coding_system (coding_system_symbol, coding)
for (i = 0; i < 4; i++)
{
if (INTEGERP (flags[i])
&& (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
&& (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
|| (charset = get_charset_id (flags[i])) >= 0)
{
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
@ -2119,7 +2168,8 @@ setup_coding_system (coding_system_symbol, coding)
if (INTEGERP (XCONS (tail)->car)
&& (charset = XINT (XCONS (tail)->car),
CHARSET_VALID_P (charset)))
CHARSET_VALID_P (charset))
|| (charset = get_charset_id (XCONS (tail)->car)) >= 0)
{
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
@ -2131,7 +2181,8 @@ setup_coding_system (coding_system_symbol, coding)
{
if (INTEGERP (XCONS (tail)->car)
&& (charset = XINT (XCONS (tail)->car),
CHARSET_VALID_P (charset)))
CHARSET_VALID_P (charset))
|| (charset = get_charset_id (XCONS (tail)->car)) >= 0)
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
= i;
else if (EQ (XCONS (tail)->car, Qt))
@ -2190,7 +2241,7 @@ setup_coding_system (coding_system_symbol, coding)
case 3:
coding->type = coding_type_big5;
coding->flags
= (NILP (XVECTOR (coding_system_vector)->contents[4])
= (NILP (XVECTOR (coding_system)->contents[4])
? CODING_FLAG_BIG5_HKU
: CODING_FLAG_BIG5_ETEN);
break;
@ -2198,7 +2249,7 @@ setup_coding_system (coding_system_symbol, coding)
case 4:
coding->type = coding_type_ccl;
{
Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
Lisp_Object val = XVECTOR (coding_system)->contents[4];
if (CONSP (val)
&& VECTORP (XCONS (val)->car)
&& VECTORP (XCONS (val)->cdr))
@ -2223,6 +2274,8 @@ setup_coding_system (coding_system_symbol, coding)
label_invalid_coding_system:
coding->type = coding_type_no_conversion;
coding->symbol = coding->pre_write_conversion = coding->post_read_conversion
= Qnil;
return -1;
}
@ -2236,52 +2289,52 @@ setup_coding_system (coding_system_symbol, coding)
The category for a coding system which has the same code range
as Emacs' internal format. Assigned the coding-system (Lisp
symbol) `coding-system-internal' by default.
symbol) `internal' by default.
o coding-category-sjis
The category for a coding system which has the same code range
as SJIS. Assigned the coding-system (Lisp
symbol) `coding-system-sjis' by default.
symbol) `shift-jis' by default.
o coding-category-iso-7
The category for a coding system which has the same code range
as ISO2022 of 7-bit environment. Assigned the coding-system
(Lisp symbol) `coding-system-junet' by default.
(Lisp symbol) `iso-2022-7' by default.
o coding-category-iso-8-1
The category for a coding system which has the same code range
as ISO2022 of 8-bit environment and graphic plane 1 used only
for DIMENSION1 charset. Assigned the coding-system (Lisp
symbol) `coding-system-ctext' by default.
symbol) `iso-8859-1' by default.
o coding-category-iso-8-2
The category for a coding system which has the same code range
as ISO2022 of 8-bit environment and graphic plane 1 used only
for DIMENSION2 charset. Assigned the coding-system (Lisp
symbol) `coding-system-euc-japan' by default.
symbol) `euc-japan' by default.
o coding-category-iso-else
The category for a coding system which has the same code range
as ISO2022 but not belongs to any of the above three
categories. Assigned the coding-system (Lisp symbol)
`coding-system-iso-2022-ss2-7' by default.
`iso-2022-ss2-7' by default.
o coding-category-big5
The category for a coding system which has the same code range
as BIG5. Assigned the coding-system (Lisp symbol)
`coding-system-big5' by default.
`cn-big5' by default.
o coding-category-binary
The category for a coding system not categorized in any of the
above. Assigned the coding-system (Lisp symbol)
`coding-system-noconv' by default.
`no-conversion' by default.
Each of them is a Lisp symbol and the value is an actual
`coding-system's (this is also a Lisp symbol) assigned by a user.
@ -2549,7 +2602,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
{
unsigned char *p = destination, *pend = destination + produced;
while (p < pend)
if (*p++ = '\015') p[-1] = '\n';
if (*p++ == '\015') p[-1] = '\n';
}
}
*consumed = produced;
@ -2687,23 +2740,26 @@ See document of make-coding-system for coding-system object.")
DEFUN ("read-non-nil-coding-system",
Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0,
"Read a coding-system from the minibuffer, prompting with string PROMPT.")
"Read a coding system from the minibuffer, prompting with string PROMPT.")
(prompt)
Lisp_Object prompt;
{
return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
Qt, Qnil, Qnil),
Qnil);
Lisp_Object val;
do {
val = Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
Qt, Qnil, Qnil);
} while (XSTRING (val)->size == 0);
return (Fintern (val, Qnil));
}
DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0,
"Read a coding-system or nil from the minibuffer, prompting with string PROMPT.")
"Read a coding system or nil from the minibuffer, prompting with string PROMPT.")
(prompt)
Lisp_Object prompt;
{
return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
Qt, Qnil, Qnil),
Qnil);
Lisp_Object val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
Qt, Qnil, Qnil);
return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
}
DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
@ -2726,7 +2782,7 @@ DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2, 2, 0,
"Detect coding-system of the text in the region between START and END.\n\
Return a list of possible coding-systems ordered by priority.\n\
If only ASCII characters are found, it returns `coding-system-automatic'\n\
If only ASCII characters are found, it returns `automatic-conversion'\n\
or its subsidiary coding-system according to a detected end-of-line format.")
(b, e)
Lisp_Object b, e;
@ -2744,7 +2800,7 @@ If only ASCII characters are found, it returns `coding-system-automatic'\n\
if (coding_mask == CODING_CATEGORY_MASK_ANY)
{
val = intern ("coding-system-automatic");
val = intern ("automatic-conversion");
if (eol_type != CODING_EOL_AUTOMATIC)
{
Lisp_Object val2 = Fget (val, Qeol_type);
@ -2823,9 +2879,24 @@ shrink_conversion_area (begp, endp, coding, encodep)
case coding_type_ccl:
/* We can't skip any data. */
return;
case coding_type_iso2022:
if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
{
unsigned char *bol = beg_addr;
while (beg_addr < end_addr && *beg_addr < 0x80)
{
beg_addr++;
if (*(beg_addr - 1) == '\n')
bol = beg_addr;
}
beg_addr = bol;
goto label_skip_tail;
}
/* fall down ... */
default:
/* We can skip all ASCII characters at the head and tail. */
while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++;
label_skip_tail:
while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--;
break;
}
@ -2974,8 +3045,8 @@ code_convert_region (b, e, coding, encodep)
}
Lisp_Object
code_convert_string (str, coding, encodep)
Lisp_Object str;
code_convert_string (str, coding, encodep, nocopy)
Lisp_Object str, nocopy;
struct coding_system *coding;
int encodep;
{
@ -3014,7 +3085,7 @@ code_convert_string (str, coding, encodep)
if (begp == endp)
/* We need no conversion. */
return str;
return (NILP (nocopy) ? Fcopy_sequence (str) : str);
head_skip = begp - XSTRING (str)->data;
tail_skip = XSTRING (str)->size - head_skip - (endp - begp);
@ -3044,8 +3115,10 @@ code_convert_string (str, coding, encodep)
}
DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
3, 3, 0,
"Decode the text between START and END which is encoded in CODING-SYSTEM.\n\
3, 3, "r\nzCoding system: ",
"Decode current region by specified coding system.\n\
When called from a program, takes three arguments:\n\
START, END, and CODING-SYSTEM. START END are buffer positions.\n\
Return length of decoded text.")
(b, e, coding_system)
Lisp_Object b, e, coding_system;
@ -3056,6 +3129,8 @@ Return length of decoded text.")
CHECK_NUMBER_COERCE_MARKER (e, 1);
CHECK_SYMBOL (coding_system, 2);
if (NILP (coding_system))
return make_number (XFASTINT (e) - XFASTINT (b));
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
@ -3063,8 +3138,10 @@ Return length of decoded text.")
}
DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
3, 3, 0,
"Encode the text between START and END to CODING-SYSTEM.\n\
3, 3, "r\nzCoding system: ",
"Encode current region by specified coding system.\n\
When called from a program, takes three arguments:\n\
START, END, and CODING-SYSTEM. START END are buffer positions.\n\
Return length of encoded text.")
(b, e, coding_system)
Lisp_Object b, e, coding_system;
@ -3075,6 +3152,8 @@ Return length of encoded text.")
CHECK_NUMBER_COERCE_MARKER (e, 1);
CHECK_SYMBOL (coding_system, 2);
if (NILP (coding_system))
return make_number (XFASTINT (e) - XFASTINT (b));
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
@ -3082,41 +3161,49 @@ Return length of encoded text.")
}
DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
2, 2, 0,
"Decode STRING which is encoded in CODING-SYSTEM, and return the result.")
(string, coding_system)
Lisp_Object string, coding_system;
2, 3, 0,
"Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
of decoding.")
(string, coding_system, nocopy)
Lisp_Object string, coding_system, nocopy;
{
struct coding_system coding;
CHECK_STRING (string, 0);
CHECK_SYMBOL (coding_system, 1);
if (NILP (coding_system))
return (NILP (nocopy) ? Fcopy_sequence (string) : string);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
return code_convert_string (string, &coding, 0);
return code_convert_string (string, &coding, 0, nocopy);
}
DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
2, 2, 0,
"Encode STRING to CODING-SYSTEM, and return the result.")
(string, coding_system)
Lisp_Object string, coding_system;
2, 3, 0,
"Encode STRING to CODING-SYSTEM, and return the result.\n\
Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
of encoding.")
(string, coding_system, nocopy)
Lisp_Object string, coding_system, nocopy;
{
struct coding_system coding;
CHECK_STRING (string, 0);
CHECK_SYMBOL (coding_system, 1);
if (NILP (coding_system))
return (NILP (nocopy) ? Fcopy_sequence (string) : string);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
return code_convert_string (string, &coding, 1);
return code_convert_string (string, &coding, 1, nocopy);
}
DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
"Decode a JISX0208 character of SJIS coding-system-sjis.\n\
"Decode a JISX0208 character of shift-jis encoding.\n\
CODE is the character code in SJIS.\n\
Return the corresponding character.")
(code)
@ -3255,7 +3342,7 @@ For each OPERATION, TARGET is selected from the arguments as below:\n\
\n\
The return value is a cons of coding systems for decoding and encoding\n\
registered in nested alist `coding-system-alist' (which see) at a slot\n\
corresponding to OPERATION and TARGET.
corresponding to OPERATION and TARGET.\n\
If a function symbol is at the slot, return a result of the function call.\n\
The function is called with one argument, a list of all the arguments.")
(nargs, args)
@ -3346,6 +3433,39 @@ init_coding_once ()
iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
setup_coding_system (Qnil, &keyboard_coding);
setup_coding_system (Qnil, &terminal_coding);
}
#ifdef emacs
syms_of_coding ()
{
Qtarget_idx = intern ("target-idx");
staticpro (&Qtarget_idx);
Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
Fput (Qwrite_region, Qtarget_idx, make_number (2));
Qcall_process = intern ("call-process");
staticpro (&Qcall_process);
Fput (Qcall_process, Qtarget_idx, make_number (0));
Qcall_process_region = intern ("call-process-region");
staticpro (&Qcall_process_region);
Fput (Qcall_process_region, Qtarget_idx, make_number (2));
Qstart_process = intern ("start-process");
staticpro (&Qstart_process);
Fput (Qstart_process, Qtarget_idx, make_number (2));
Qopen_network_stream = intern ("open-network-stream");
staticpro (&Qopen_network_stream);
Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
Qcoding_system = intern ("coding-system");
staticpro (&Qcoding_system);
@ -3389,39 +3509,6 @@ init_coding_once ()
}
}
conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
setup_coding_system (Qnil, &keyboard_coding);
setup_coding_system (Qnil, &terminal_coding);
}
#ifdef emacs
syms_of_coding ()
{
Qtarget_idx = intern ("target-idx");
staticpro (&Qtarget_idx);
Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
Fput (Qwrite_region, Qtarget_idx, make_number (2));
Qcall_process = intern ("call-process");
staticpro (&Qcall_process);
Fput (Qcall_process, Qtarget_idx, make_number (0));
Qcall_process_region = intern ("call-process-region");
staticpro (&Qcall_process_region);
Fput (Qcall_process_region, Qtarget_idx, make_number (2));
Qstart_process = intern ("start-process");
staticpro (&Qstart_process);
Fput (Qstart_process, Qtarget_idx, make_number (2));
Qopen_network_stream = intern ("open-network-stream");
staticpro (&Qopen_network_stream);
Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
defsubr (&Scoding_system_vector);
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
@ -3472,7 +3559,7 @@ If not, an appropriate element in `coding-system-alist' (which see) is used.");
DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
"Nested alist to decide a coding system for a specific I/O operation.\n\
The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\
\n\
OPERATION is one of the following Emacs I/O primitives:\n\
For file I/O, insert-file-contents and write-region.\n\
For process I/O, call-process, call-process-region, and start-process.\n\