Mercurial > emacs
comparison src/coding.c @ 89225:32058afc72e2
(detect_coding_charset): If only ASCII bytes are found, return 0.
(detect_coding_system): Fix previous change.
(Fdefine_coding_system_internal): Setup CODING_ATTR_ASCII_COMPAT
(attrs) correctly.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Wed, 16 Oct 2002 05:03:55 +0000 |
| parents | e6779a6185ca |
| children | 101ee928c088 |
comparison
equal
deleted
inserted
replaced
| 89224:d77928dacd87 | 89225:32058afc72e2 |
|---|---|
| 4368 unsigned char *src = coding->source, *src_base = src; | 4368 unsigned char *src = coding->source, *src_base = src; |
| 4369 unsigned char *src_end = coding->source + coding->src_bytes; | 4369 unsigned char *src_end = coding->source + coding->src_bytes; |
| 4370 int multibytep = coding->src_multibyte; | 4370 int multibytep = coding->src_multibyte; |
| 4371 int consumed_chars = 0; | 4371 int consumed_chars = 0; |
| 4372 Lisp_Object attrs, valids; | 4372 Lisp_Object attrs, valids; |
| 4373 int found = 0; | |
| 4373 | 4374 |
| 4374 coding = &coding_categories[coding_category_charset]; | 4375 coding = &coding_categories[coding_category_charset]; |
| 4375 attrs = CODING_ID_ATTRS (coding->id); | 4376 attrs = CODING_ID_ATTRS (coding->id); |
| 4376 valids = AREF (attrs, coding_attr_charset_valids); | 4377 valids = AREF (attrs, coding_attr_charset_valids); |
| 4377 | 4378 |
| 4383 int c; | 4384 int c; |
| 4384 | 4385 |
| 4385 ONE_MORE_BYTE (c); | 4386 ONE_MORE_BYTE (c); |
| 4386 if (NILP (AREF (valids, c))) | 4387 if (NILP (AREF (valids, c))) |
| 4387 break; | 4388 break; |
| 4389 if (c >= 0x80) | |
| 4390 found = 1; | |
| 4388 } | 4391 } |
| 4389 *mask &= ~CATEGORY_MASK_CHARSET; | 4392 *mask &= ~CATEGORY_MASK_CHARSET; |
| 4390 return 0; | 4393 return 0; |
| 4391 | 4394 |
| 4392 no_more_source: | 4395 no_more_source: |
| 4393 return 1; | 4396 return (found || NILP (CODING_ATTR_ASCII_COMPAT (attrs))); |
| 4394 } | 4397 } |
| 4395 | 4398 |
| 4396 static void | 4399 static void |
| 4397 decode_coding_charset (coding) | 4400 decode_coding_charset (coding) |
| 4398 struct coding_system *coding; | 4401 struct coding_system *coding; |
| 6321 { | 6324 { |
| 6322 c = *src; | 6325 c = *src; |
| 6323 if (c & 0x80 | 6326 if (c & 0x80 |
| 6324 || (c < 0x20 && (c == ISO_CODE_ESC | 6327 || (c < 0x20 && (c == ISO_CODE_ESC |
| 6325 || c == ISO_CODE_SI | 6328 || c == ISO_CODE_SI |
| 6326 || c == ISO_CODE_SO | 6329 || c == ISO_CODE_SO))) |
| 6327 /* Most UTF-16 text contains '\0'. */ | |
| 6328 || !c))) | |
| 6329 break; | 6330 break; |
| 6330 } | 6331 } |
| 6331 coding.head_ascii = src - coding.source; | 6332 coding.head_ascii = src - coding.source; |
| 6332 | 6333 |
| 6333 if (src < src_end) | 6334 if (src < src_end) |
| 7469 make_number (255)); | 7470 make_number (255)); |
| 7470 for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) | 7471 for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) |
| 7471 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0; | 7472 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0; |
| 7472 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets; | 7473 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets; |
| 7473 | 7474 |
| 7475 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p]; | |
| 7476 | |
| 7474 val = args[coding_arg_decode_translation_table]; | 7477 val = args[coding_arg_decode_translation_table]; |
| 7475 if (! NILP (val)) | 7478 if (! NILP (val)) |
| 7476 CHECK_CHAR_TABLE (val); | 7479 CHECK_CHAR_TABLE (val); |
| 7477 CODING_ATTR_DECODE_TBL (attrs) = val; | 7480 CODING_ATTR_DECODE_TBL (attrs) = val; |
| 7478 | 7481 |
| 7523 { | 7526 { |
| 7524 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail))); | 7527 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail))); |
| 7525 int dim = CHARSET_DIMENSION (charset); | 7528 int dim = CHARSET_DIMENSION (charset); |
| 7526 int idx = (dim - 1) * 4; | 7529 int idx = (dim - 1) * 4; |
| 7527 | 7530 |
| 7531 if (CHARSET_ASCII_COMPATIBLE_P (charset)) | |
| 7532 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | |
| 7533 | |
| 7528 for (i = charset->code_space[idx]; | 7534 for (i = charset->code_space[idx]; |
| 7529 i <= charset->code_space[idx + 1]; i++) | 7535 i <= charset->code_space[idx + 1]; i++) |
| 7530 { | 7536 { |
| 7531 Lisp_Object tmp, tmp2; | 7537 Lisp_Object tmp, tmp2; |
| 7532 int dim2; | 7538 int dim2; |
| 7609 } | 7615 } |
| 7610 else if (EQ (coding_type, Qutf_16)) | 7616 else if (EQ (coding_type, Qutf_16)) |
| 7611 { | 7617 { |
| 7612 Lisp_Object bom, endian; | 7618 Lisp_Object bom, endian; |
| 7613 | 7619 |
| 7620 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil; | |
| 7621 | |
| 7614 if (nargs < coding_arg_utf16_max) | 7622 if (nargs < coding_arg_utf16_max) |
| 7615 goto short_args; | 7623 goto short_args; |
| 7616 | 7624 |
| 7617 bom = args[coding_arg_utf16_bom]; | 7625 bom = args[coding_arg_utf16_bom]; |
| 7618 if (! NILP (bom) && ! EQ (bom, Qt)) | 7626 if (! NILP (bom) && ! EQ (bom, Qt)) |
| 7649 for (i = 0; i < 4; i++) | 7657 for (i = 0; i < 4; i++) |
| 7650 { | 7658 { |
| 7651 val = Faref (initial, make_number (i)); | 7659 val = Faref (initial, make_number (i)); |
| 7652 if (! NILP (val)) | 7660 if (! NILP (val)) |
| 7653 { | 7661 { |
| 7654 CHECK_CHARSET_GET_ID (val, id); | 7662 struct charset *charset; |
| 7655 ASET (initial, i, make_number (id)); | 7663 |
| 7664 CHECK_CHARSET_GET_CHARSET (val, charset); | |
| 7665 ASET (initial, i, make_number (CHARSET_ID (charset))); | |
| 7666 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset)) | |
| 7667 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | |
| 7656 } | 7668 } |
| 7657 else | 7669 else |
| 7658 ASET (initial, i, make_number (-1)); | 7670 ASET (initial, i, make_number (-1)); |
| 7659 } | 7671 } |
| 7660 | 7672 |
| 7711 } | 7723 } |
| 7712 else if (EQ (coding_type, Qemacs_mule)) | 7724 else if (EQ (coding_type, Qemacs_mule)) |
| 7713 { | 7725 { |
| 7714 if (EQ (args[coding_arg_charset_list], Qemacs_mule)) | 7726 if (EQ (args[coding_arg_charset_list], Qemacs_mule)) |
| 7715 ASET (attrs, coding_attr_emacs_mule_full, Qt); | 7727 ASET (attrs, coding_attr_emacs_mule_full, Qt); |
| 7716 | 7728 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; |
| 7717 category = coding_category_emacs_mule; | 7729 category = coding_category_emacs_mule; |
| 7718 } | 7730 } |
| 7719 else if (EQ (coding_type, Qshift_jis)) | 7731 else if (EQ (coding_type, Qshift_jis)) |
| 7720 { | 7732 { |
| 7721 | 7733 |
| 7726 | 7738 |
| 7727 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | 7739 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| 7728 if (CHARSET_DIMENSION (charset) != 1) | 7740 if (CHARSET_DIMENSION (charset) != 1) |
| 7729 error ("Dimension of charset %s is not one", | 7741 error ("Dimension of charset %s is not one", |
| 7730 XSYMBOL (CHARSET_NAME (charset))->name->data); | 7742 XSYMBOL (CHARSET_NAME (charset))->name->data); |
| 7743 if (CHARSET_ASCII_COMPATIBLE_P (charset)) | |
| 7744 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | |
| 7731 | 7745 |
| 7732 charset_list = XCDR (charset_list); | 7746 charset_list = XCDR (charset_list); |
| 7733 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | 7747 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| 7734 if (CHARSET_DIMENSION (charset) != 1) | 7748 if (CHARSET_DIMENSION (charset) != 1) |
| 7735 error ("Dimension of charset %s is not one", | 7749 error ("Dimension of charset %s is not one", |
| 7753 | 7767 |
| 7754 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | 7768 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| 7755 if (CHARSET_DIMENSION (charset) != 1) | 7769 if (CHARSET_DIMENSION (charset) != 1) |
| 7756 error ("Dimension of charset %s is not one", | 7770 error ("Dimension of charset %s is not one", |
| 7757 XSYMBOL (CHARSET_NAME (charset))->name->data); | 7771 XSYMBOL (CHARSET_NAME (charset))->name->data); |
| 7772 if (CHARSET_ASCII_COMPATIBLE_P (charset)) | |
| 7773 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | |
| 7758 | 7774 |
| 7759 charset_list = XCDR (charset_list); | 7775 charset_list = XCDR (charset_list); |
| 7760 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | 7776 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| 7761 if (CHARSET_DIMENSION (charset) != 2) | 7777 if (CHARSET_DIMENSION (charset) != 2) |
| 7762 error ("Dimension of charset %s is not two", | 7778 error ("Dimension of charset %s is not two", |
| 7764 | 7780 |
| 7765 category = coding_category_big5; | 7781 category = coding_category_big5; |
| 7766 Vbig5_coding_system = name; | 7782 Vbig5_coding_system = name; |
| 7767 } | 7783 } |
| 7768 else if (EQ (coding_type, Qraw_text)) | 7784 else if (EQ (coding_type, Qraw_text)) |
| 7769 category = coding_category_raw_text; | 7785 { |
| 7786 category = coding_category_raw_text; | |
| 7787 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | |
| 7788 } | |
| 7770 else if (EQ (coding_type, Qutf_8)) | 7789 else if (EQ (coding_type, Qutf_8)) |
| 7771 category = coding_category_utf_8; | 7790 { |
| 7791 category = coding_category_utf_8; | |
| 7792 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | |
| 7793 } | |
| 7772 else if (EQ (coding_type, Qundecided)) | 7794 else if (EQ (coding_type, Qundecided)) |
| 7773 category = coding_category_undecided; | 7795 category = coding_category_undecided; |
| 7774 else | 7796 else |
| 7775 error ("Invalid coding system type: %s", | 7797 error ("Invalid coding system type: %s", |
| 7776 XSYMBOL (coding_type)->name->data); | 7798 XSYMBOL (coding_type)->name->data); |
