(Qno_conversion, Qundecided): New variables.

(syms_of_coding): Initialize and staticpro them.
(coding_category_name): Include "coding-category-raw-test".
(setup_coding_system): Handle coding_type_raw_text.
(detect_coding_mask): Include CODING_CATEGORY_MASK_RAW_TEXT in the
return value instead of CODING_CATEGORY_MASK_BINARY.
(detect_coding): Do not check the case that `mask' is 0, which
never happens now.
(detect_eol_type): If EOL format is inconsistent, return
CODING_EOL_INCONSISTENT.
(detect_eol): If EOL format of raw-text file is inconsistent,
detect it as no-conversion.
(decode_coding): Handle coding_type_raw_text.
(encode_coding): Likewise.
(Fdetect_coding_region): Ajusted for the above changes.
(shrink_conversion_area): Handle coding_type_raw_text.
This commit is contained in:
Kenichi Handa 1997-08-28 10:51:12 +00:00
parent e80de6b18f
commit 27901516b6

View file

@ -67,7 +67,13 @@ Boston, MA 02111-1307, USA. */
(all uppercase), we mean the coding system, and when we write
"Big5" (capitalized), we mean the character set.
4. Other
4. Raw text
A coding system to for a text containing random 8-bit code. Emacs
does no code conversion on such a text except for end-of-line
format.
5. Other
If a user wants to read/write a text encoded in a coding system not
listed above, he can supply a decoder and an encoder for it in CCL
@ -246,6 +252,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
Lisp_Object Qcoding_system, Qeol_type;
Lisp_Object Qbuffer_file_coding_system;
Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
Lisp_Object Qno_conversion, Qundecided;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
@ -319,6 +326,7 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
"coding-category-iso-7-else",
"coding-category-iso-8-else",
"coding-category-big5",
"coding-category-raw-text",
"coding-category-binary"
};
@ -2546,6 +2554,10 @@ setup_coding_system (coding_system, coding)
coding->require_flushing = 1;
break;
case 5:
coding->type = coding_type_raw_text;
break;
default:
if (EQ (type, Qt))
coding->type = coding_type_undecided;
@ -2687,7 +2699,7 @@ detect_coding_mask (src, src_bytes)
/* If C is a special latin extra code,
or is an ISO2022 specific control code of C1 (SS2 or SS3),
or is an ISO2022 control-sequence-introducer (CSI),
we should also consider the possibility of someof ISO2022 codings. */
we should also consider the possibility of ISO2022 codings. */
if ((VECTORP (Vlatin_extra_code_table)
&& !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
|| (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
@ -2700,14 +2712,14 @@ detect_coding_mask (src, src_bytes)
mask = (detect_coding_iso2022 (src, src_end)
| detect_coding_sjis (src, src_end)
| detect_coding_emacs_mule (src, src_end)
| CODING_CATEGORY_MASK_BINARY);
| CODING_CATEGORY_MASK_RAW_TEXT);
else
/* C is the first byte of SJIS character code, or a
leading-code of Emacs. */
/* C is the first byte of SJIS character code,
or a leading-code of Emacs' internal format (emacs-mule). */
mask = (detect_coding_sjis (src, src_end)
| detect_coding_emacs_mule (src, src_end)
| CODING_CATEGORY_MASK_BINARY);
| CODING_CATEGORY_MASK_RAW_TEXT);
}
else
/* C is a character of ISO2022 in graphic plane right,
@ -2716,7 +2728,7 @@ detect_coding_mask (src, src_bytes)
mask = (detect_coding_iso2022 (src, src_end)
| detect_coding_sjis (src, src_end)
| detect_coding_big5 (src, src_end)
| CODING_CATEGORY_MASK_BINARY);
| CODING_CATEGORY_MASK_RAW_TEXT);
return mask;
}
@ -2732,42 +2744,33 @@ detect_coding (coding, src, src_bytes)
{
int mask = detect_coding_mask (src, src_bytes);
int idx;
Lisp_Object val = Vcoding_category_list;
if (mask == CODING_CATEGORY_MASK_ANY)
/* We found nothing other than ASCII. There's nothing to do. */
return;
if (!mask)
/* The source text seems to be encoded in unknown coding system.
Emacs regards the category of such a kind of coding system as
`coding-category-binary'. We assume that a user has assigned
an appropriate coding system for a `coding-category-binary'. */
idx = CODING_CATEGORY_IDX_BINARY;
/* We found some plausible coding systems. Let's use a coding
system of the highest priority. */
if (CONSP (val))
while (!NILP (val))
{
idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx)))
break;
val = XCONS (val)->cdr;
}
else
val = Qnil;
if (NILP (val))
{
/* We found some plausible coding systems. Let's use a coding
system of the highest priority. */
Lisp_Object val = Vcoding_category_list;
if (CONSP (val))
while (!NILP (val))
{
idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx)))
break;
val = XCONS (val)->cdr;
}
else
val = Qnil;
if (NILP (val))
{
/* For unknown reason, `Vcoding_category_list' contains none
of found categories. Let's use any of them. */
for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++)
if (mask & (1 << idx))
break;
}
/* For unknown reason, `Vcoding_category_list' contains none of
found categories. Let's use any of them. */
for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++)
if (mask & (1 << idx))
break;
}
setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding);
}
@ -2807,8 +2810,8 @@ detect_eol_type (src, src_bytes)
eol_type = this_eol_type;
else if (eol_type != this_eol_type)
/* The found type is different from what found before.
We had better not decode end-of-line. */
return CODING_EOL_LF;
Let's notice the caller about this inconsistency. */
return CODING_EOL_INCONSISTENT;
}
}
@ -2832,6 +2835,24 @@ detect_eol (coding, src, src_bytes)
/* We found no end-of-line in the source text. */
return;
if (eol_type == CODING_EOL_INCONSISTENT)
{
#if 0
/* This code is suppressed until we find a better way to
distinguish raw-text and binary. */
/* If we have already detected that the coding is raw-text, the
coding should actually be no-conversion. */
if (coding->type == coding_type_raw_text)
{
setup_coding_system (Qno_conversion, coding);
return;
}
/* Else, let's decode only text code anyway. */
#endif /* 0 */
eol_type == CODING_EOL_LF;
}
coding_system = coding->symbol;
while (!NILP (coding_system)
&& NILP (val = Fget (coding_system, Qeol_type)))
@ -2877,6 +2898,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
case coding_type_emacs_mule:
case coding_type_undecided:
case coding_type_raw_text:
if (coding->eol_type == CODING_EOL_LF
|| coding->eol_type == CODING_EOL_UNDECIDED)
goto label_no_conversion;
@ -2941,6 +2963,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
case coding_type_emacs_mule:
case coding_type_undecided:
case coding_type_raw_text:
if (coding->eol_type == CODING_EOL_LF
|| coding->eol_type == CODING_EOL_UNDECIDED)
goto label_no_conversion;
@ -3133,10 +3156,11 @@ If only ASCII characters are found, it returns `undecided'\n\
if (coding_mask == CODING_CATEGORY_MASK_ANY)
{
val = intern ("undecided");
if (eol_type != CODING_EOL_UNDECIDED)
val = Qundecided;
if (eol_type != CODING_EOL_UNDECIDED
&& eol_type != CODING_EOL_INCONSISTENT)
{
Lisp_Object val2 = Fget (val, Qeol_type);
Lisp_Object val2 = Fget (Qundecided, Qeol_type);
if (VECTORP (val2))
val = XVECTOR (val2)->contents[eol_type];
}
@ -3155,13 +3179,26 @@ If only ASCII characters are found, it returns `undecided'\n\
int idx
= XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index));
if (coding_mask & (1 << idx))
val = Fcons (Fsymbol_value (XCONS (val2)->car), val);
{
#if 0
/* This code is suppressed until we find a better way to
distinguish raw-text and binary. */
if (idx == CODING_CATEGORY_IDX_RAW_TEXT
&& eol_type == CODING_EOL_INCONSISTENT)
val = Fcons (Qno_conversion, val);
else
#endif /* 0 */
val = Fcons (Fsymbol_value (XCONS (val2)->car), val);
}
}
/* Then, change the order of the list, while getting subsidiary
coding-systems. */
val2 = val;
val = Qnil;
if (eol_type == CODING_EOL_INCONSISTENT)
eol_type == CODING_EOL_UNDECIDED;
for (; !NILP (val2); val2 = XCONS (val2)->cdr)
{
if (eol_type == CODING_EOL_UNDECIDED)
@ -3206,6 +3243,7 @@ shrink_conversion_area (begp, endp, coding, encodep)
case coding_type_no_conversion:
case coding_type_emacs_mule:
case coding_type_undecided:
case coding_type_raw_text:
/* We need no conversion. */
*begp = *endp;
return;
@ -3243,6 +3281,7 @@ shrink_conversion_area (begp, endp, coding, encodep)
*begp = *endp;
return;
case coding_type_emacs_mule:
case coding_type_raw_text:
if (coding->eol_type == CODING_EOL_LF)
{
/* We need no conversion. */
@ -3857,6 +3896,12 @@ syms_of_coding ()
Qpre_write_conversion = intern ("pre-write-conversion");
staticpro (&Qpre_write_conversion);
Qno_conversion = intern ("no-conversion");
staticpro (&Qno_conversion);
Qundecided = intern ("undecided");
staticpro (&Qundecided);
Qcoding_system_spec = intern ("coding-system-spec");
staticpro (&Qcoding_system_spec);