Mercurial > emacs
comparison src/coding.c @ 22119:592bb8b9bcfd
Change terms unify/unification to
translate/translation respectively throughtout the file.
(encode_coding_iso2022): Fix bug in encoding a text ending by a
composite character.
(check_composing_code): If we are decoding the last block of data,
return 0 even if the source doesn't end by an escape sequence
which terminates the current composing sequence.
(decode_coding_iso2022): Decode correctly even if the source
doesn't end by an escape sequence which terminates the current
composing sequence.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Mon, 18 May 1998 00:59:38 +0000 |
| parents | e144ad749084 |
| children | 917e237b5e65 |
comparison
equal
deleted
inserted
replaced
| 22118:42e2ffa98618 | 22119:592bb8b9bcfd |
|---|---|
| 365 | 365 |
| 366 /* Table pointers to coding systems corresponding to each coding | 366 /* Table pointers to coding systems corresponding to each coding |
| 367 categories. */ | 367 categories. */ |
| 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX]; | 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX]; |
| 369 | 369 |
| 370 /* Flag to tell if we look up unification table on character code | 370 /* Flag to tell if we look up character translation table on character |
| 371 conversion. */ | 371 code conversion. */ |
| 372 Lisp_Object Venable_character_unification; | 372 Lisp_Object Venable_character_translation; |
| 373 /* Standard unification table to look up on decoding (reading). */ | 373 /* Standard character translation table to look up on decoding (reading). */ |
| 374 Lisp_Object Vstandard_character_unification_table_for_decode; | 374 Lisp_Object Vstandard_character_translation_table_for_decode; |
| 375 /* Standard unification table to look up on encoding (writing). */ | 375 /* Standard character translation table to look up on encoding (writing). */ |
| 376 Lisp_Object Vstandard_character_unification_table_for_encode; | 376 Lisp_Object Vstandard_character_translation_table_for_encode; |
| 377 | 377 |
| 378 Lisp_Object Qcharacter_unification_table; | 378 Lisp_Object Qcharacter_translation_table; |
| 379 Lisp_Object Qcharacter_unification_table_for_decode; | 379 Lisp_Object Qcharacter_translation_table_id; |
| 380 Lisp_Object Qcharacter_unification_table_for_encode; | 380 Lisp_Object Qcharacter_translation_table_for_decode; |
| 381 Lisp_Object Qcharacter_translation_table_for_encode; | |
| 381 | 382 |
| 382 /* Alist of charsets vs revision number. */ | 383 /* Alist of charsets vs revision number. */ |
| 383 Lisp_Object Vcharset_revision_alist; | 384 Lisp_Object Vcharset_revision_alist; |
| 384 | 385 |
| 385 /* Default coding systems used for process I/O. */ | 386 /* Default coding systems used for process I/O. */ |
| 888 { \ | 889 { \ |
| 889 src--; \ | 890 src--; \ |
| 890 c2 = ' '; \ | 891 c2 = ' '; \ |
| 891 } \ | 892 } \ |
| 892 } \ | 893 } \ |
| 893 if (!NILP (unification_table) \ | 894 if (!NILP (translation_table) \ |
| 894 && ((c_alt = unify_char (unification_table, \ | 895 && ((c_alt = translate_char (translation_table, \ |
| 895 -1, (charset), c1, c2)) >= 0)) \ | 896 -1, (charset), c1, c2)) >= 0)) \ |
| 896 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | 897 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ |
| 897 } \ | 898 } \ |
| 898 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \ | 899 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \ |
| 899 DECODE_CHARACTER_ASCII (c1); \ | 900 DECODE_CHARACTER_ASCII (c1); \ |
| 900 else if (CHARSET_DIMENSION (charset_alt) == 1) \ | 901 else if (CHARSET_DIMENSION (charset_alt) == 1) \ |
| 941 /* Check if the current composing sequence contains only valid codes. | 942 /* Check if the current composing sequence contains only valid codes. |
| 942 If the composing sequence doesn't end before SRC_END, return -1. | 943 If the composing sequence doesn't end before SRC_END, return -1. |
| 943 Else, if it contains only valid codes, return 0. | 944 Else, if it contains only valid codes, return 0. |
| 944 Else return the length of the composing sequence. */ | 945 Else return the length of the composing sequence. */ |
| 945 | 946 |
| 946 int check_composing_code (coding, src, src_end) | 947 int |
| 948 check_composing_code (coding, src, src_end) | |
| 947 struct coding_system *coding; | 949 struct coding_system *coding; |
| 948 unsigned char *src, *src_end; | 950 unsigned char *src, *src_end; |
| 949 { | 951 { |
| 950 unsigned char *src_start = src; | 952 unsigned char *src_start = src; |
| 951 int invalid_code_found = 0; | 953 int invalid_code_found = 0; |
| 980 } | 982 } |
| 981 else | 983 else |
| 982 invalid_code_found = 1; | 984 invalid_code_found = 1; |
| 983 } | 985 } |
| 984 } | 986 } |
| 985 return ((coding->mode & CODING_MODE_LAST_BLOCK) ? src_end - src_start : -1); | 987 return (invalid_code_found |
| 988 ? src - src_start | |
| 989 : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1)); | |
| 986 } | 990 } |
| 987 | 991 |
| 988 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 992 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 989 | 993 |
| 990 int | 994 int |
| 1003 unsigned char *adjusted_dst_end = dst_end - 6; | 1007 unsigned char *adjusted_dst_end = dst_end - 6; |
| 1004 int charset; | 1008 int charset; |
| 1005 /* Charsets invoked to graphic plane 0 and 1 respectively. */ | 1009 /* Charsets invoked to graphic plane 0 and 1 respectively. */ |
| 1006 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 1010 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 1007 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); | 1011 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); |
| 1008 Lisp_Object unification_table | 1012 Lisp_Object translation_table |
| 1009 = coding->character_unification_table_for_decode; | 1013 = coding->character_translation_table_for_decode; |
| 1010 int result = CODING_FINISH_NORMAL; | 1014 int result = CODING_FINISH_NORMAL; |
| 1011 | 1015 |
| 1012 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 1016 if (!NILP (Venable_character_translation) && NILP (translation_table)) |
| 1013 unification_table = Vstandard_character_unification_table_for_decode; | 1017 translation_table = Vstandard_character_translation_table_for_decode; |
| 1014 | 1018 |
| 1015 coding->produced_char = 0; | 1019 coding->produced_char = 0; |
| 1016 coding->fake_multibyte = 0; | 1020 coding->fake_multibyte = 0; |
| 1017 while (src < src_end && (dst_bytes | 1021 while (src < src_end && (dst_bytes |
| 1018 ? (dst < adjusted_dst_end) | 1022 ? (dst < adjusted_dst_end) |
| 1220 { | 1224 { |
| 1221 int result1; | 1225 int result1; |
| 1222 | 1226 |
| 1223 result1 = check_composing_code (coding, src, src_end); | 1227 result1 = check_composing_code (coding, src, src_end); |
| 1224 if (result1 == 0) | 1228 if (result1 == 0) |
| 1225 coding->composing = (c1 == '0' | 1229 { |
| 1226 ? COMPOSING_NO_RULE_HEAD | 1230 coding->composing = (c1 == '0' |
| 1227 : COMPOSING_WITH_RULE_HEAD); | 1231 ? COMPOSING_NO_RULE_HEAD |
| 1232 : COMPOSING_WITH_RULE_HEAD); | |
| 1233 coding->produced_char++; | |
| 1234 } | |
| 1228 else if (result1 > 0) | 1235 else if (result1 > 0) |
| 1229 { | 1236 { |
| 1230 if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst) | 1237 if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst) |
| 1231 { | 1238 { |
| 1232 bcopy (src_base, dst, result1 + 2); | 1239 bcopy (src_base, dst, result1 + 2); |
| 1245 } | 1252 } |
| 1246 break; | 1253 break; |
| 1247 | 1254 |
| 1248 case '1': /* end composing */ | 1255 case '1': /* end composing */ |
| 1249 coding->composing = COMPOSING_NO; | 1256 coding->composing = COMPOSING_NO; |
| 1250 coding->produced_char++; | |
| 1251 break; | 1257 break; |
| 1252 | 1258 |
| 1253 case '[': /* specification of direction */ | 1259 case '[': /* specification of direction */ |
| 1254 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) | 1260 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) |
| 1255 goto label_invalid_code; | 1261 goto label_invalid_code; |
| 1550 register. Then repeat the loop to actually produce the \ | 1556 register. Then repeat the loop to actually produce the \ |
| 1551 character. */ \ | 1557 character. */ \ |
| 1552 dst = encode_invocation_designation (charset, coding, dst); \ | 1558 dst = encode_invocation_designation (charset, coding, dst); \ |
| 1553 } while (1) | 1559 } while (1) |
| 1554 | 1560 |
| 1555 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ | 1561 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ |
| 1556 do { \ | 1562 do { \ |
| 1557 int c_alt, charset_alt; \ | 1563 int c_alt, charset_alt; \ |
| 1558 if (!NILP (unification_table) \ | 1564 if (!NILP (translation_table) \ |
| 1559 && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \ | 1565 && ((c_alt = translate_char (translation_table, -1, \ |
| 1560 >= 0)) \ | 1566 charset, c1, c2)) \ |
| 1561 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | 1567 >= 0)) \ |
| 1562 else \ | 1568 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ |
| 1563 charset_alt = charset; \ | 1569 else \ |
| 1564 if (CHARSET_DIMENSION (charset_alt) == 1) \ | 1570 charset_alt = charset; \ |
| 1565 { \ | 1571 if (CHARSET_DIMENSION (charset_alt) == 1) \ |
| 1566 if (charset == CHARSET_ASCII \ | 1572 { \ |
| 1567 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \ | 1573 if (charset == CHARSET_ASCII \ |
| 1568 charset_alt = charset_latin_jisx0201; \ | 1574 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \ |
| 1569 ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \ | 1575 charset_alt = charset_latin_jisx0201; \ |
| 1570 } \ | 1576 ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \ |
| 1571 else \ | 1577 } \ |
| 1572 { \ | 1578 else \ |
| 1573 if (charset == charset_jisx0208 \ | 1579 { \ |
| 1574 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \ | 1580 if (charset == charset_jisx0208 \ |
| 1575 charset_alt = charset_jisx0208_1978; \ | 1581 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \ |
| 1576 ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ | 1582 charset_alt = charset_jisx0208_1978; \ |
| 1577 } \ | 1583 ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ |
| 1578 if (! COMPOSING_P (coding->composing)) \ | 1584 } \ |
| 1579 coding->consumed_char++; \ | 1585 if (! COMPOSING_P (coding->composing)) \ |
| 1580 } while (0) | 1586 coding->consumed_char++; \ |
| 1587 } while (0) | |
| 1581 | 1588 |
| 1582 /* Produce designation and invocation codes at a place pointed by DST | 1589 /* Produce designation and invocation codes at a place pointed by DST |
| 1583 to use CHARSET. The element `spec.iso2022' of *CODING is updated. | 1590 to use CHARSET. The element `spec.iso2022' of *CODING is updated. |
| 1584 Return new DST. */ | 1591 Return new DST. */ |
| 1585 | 1592 |
| 1708 { | 1715 { |
| 1709 int c_alt; | 1716 int c_alt; |
| 1710 unsigned char c1, c2; | 1717 unsigned char c1, c2; |
| 1711 | 1718 |
| 1712 SPLIT_STRING(src, bytes, charset, c1, c2); | 1719 SPLIT_STRING(src, bytes, charset, c1, c2); |
| 1713 if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0) | 1720 if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0) |
| 1714 charset = CHAR_CHARSET (c_alt); | 1721 charset = CHAR_CHARSET (c_alt); |
| 1715 } | 1722 } |
| 1716 | 1723 |
| 1717 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); | 1724 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); |
| 1718 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0) | 1725 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0) |
| 1748 unsigned char *dst_end = destination + dst_bytes; | 1755 unsigned char *dst_end = destination + dst_bytes; |
| 1749 /* Since the maximum bytes produced by each loop is 20, we subtract 19 | 1756 /* Since the maximum bytes produced by each loop is 20, we subtract 19 |
| 1750 from DST_END to assure overflow checking is necessary only at the | 1757 from DST_END to assure overflow checking is necessary only at the |
| 1751 head of loop. */ | 1758 head of loop. */ |
| 1752 unsigned char *adjusted_dst_end = dst_end - 19; | 1759 unsigned char *adjusted_dst_end = dst_end - 19; |
| 1753 Lisp_Object unification_table | 1760 Lisp_Object translation_table |
| 1754 = coding->character_unification_table_for_encode; | 1761 = coding->character_translation_table_for_encode; |
| 1755 int result = CODING_FINISH_NORMAL; | 1762 int result = CODING_FINISH_NORMAL; |
| 1756 | 1763 |
| 1757 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 1764 if (!NILP (Venable_character_translation) && NILP (translation_table)) |
| 1758 unification_table = Vstandard_character_unification_table_for_encode; | 1765 translation_table = Vstandard_character_translation_table_for_encode; |
| 1759 | 1766 |
| 1760 coding->consumed_char = 0; | 1767 coding->consumed_char = 0; |
| 1761 coding->fake_multibyte = 0; | 1768 coding->fake_multibyte = 0; |
| 1762 while (src < src_end && (dst_bytes | 1769 while (src < src_end && (dst_bytes |
| 1763 ? (dst < adjusted_dst_end) | 1770 ? (dst < adjusted_dst_end) |
| 1773 | 1780 |
| 1774 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL | 1781 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL |
| 1775 && CODING_SPEC_ISO_BOL (coding)) | 1782 && CODING_SPEC_ISO_BOL (coding)) |
| 1776 { | 1783 { |
| 1777 /* We have to produce designation sequences if any now. */ | 1784 /* We have to produce designation sequences if any now. */ |
| 1778 encode_designation_at_bol (coding, unification_table, | 1785 encode_designation_at_bol (coding, translation_table, |
| 1779 src, src_end, &dst); | 1786 src, src_end, &dst); |
| 1780 CODING_SPEC_ISO_BOL (coding) = 0; | 1787 CODING_SPEC_ISO_BOL (coding) = 0; |
| 1781 } | 1788 } |
| 1782 | 1789 |
| 1783 c1 = *src++; | 1790 c1 = *src++; |
| 1948 | 1955 |
| 1949 /* If this is the last block of the text to be encoded, we must | 1956 /* If this is the last block of the text to be encoded, we must |
| 1950 reset graphic planes and registers to the initial state, and | 1957 reset graphic planes and registers to the initial state, and |
| 1951 flush out the carryover if any. */ | 1958 flush out the carryover if any. */ |
| 1952 if (coding->mode & CODING_MODE_LAST_BLOCK) | 1959 if (coding->mode & CODING_MODE_LAST_BLOCK) |
| 1953 ENCODE_RESET_PLANE_AND_REGISTER; | 1960 { |
| 1954 | 1961 ENCODE_RESET_PLANE_AND_REGISTER; |
| 1962 if (COMPOSING_P (coding->composing)) | |
| 1963 ENCODE_COMPOSITION_END; | |
| 1964 } | |
| 1955 coding->consumed = src - source; | 1965 coding->consumed = src - source; |
| 1956 coding->produced = coding->produced_char = dst - destination; | 1966 coding->produced = coding->produced_char = dst - destination; |
| 1957 return result; | 1967 return result; |
| 1958 } | 1968 } |
| 1959 | 1969 |
| 2033 } while (0) | 2043 } while (0) |
| 2034 | 2044 |
| 2035 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \ | 2045 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \ |
| 2036 do { \ | 2046 do { \ |
| 2037 int c_alt, charset_alt = (charset); \ | 2047 int c_alt, charset_alt = (charset); \ |
| 2038 if (!NILP (unification_table) \ | 2048 if (!NILP (translation_table) \ |
| 2039 && ((c_alt = unify_char (unification_table, \ | 2049 && ((c_alt = translate_char (translation_table, \ |
| 2040 -1, (charset), c1, c2)) >= 0)) \ | 2050 -1, (charset), c1, c2)) >= 0)) \ |
| 2041 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | 2051 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ |
| 2042 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \ | 2052 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \ |
| 2043 DECODE_CHARACTER_ASCII (c1); \ | 2053 DECODE_CHARACTER_ASCII (c1); \ |
| 2044 else if (CHARSET_DIMENSION (charset_alt) == 1) \ | 2054 else if (CHARSET_DIMENSION (charset_alt) == 1) \ |
| 2045 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \ | 2055 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \ |
| 2046 else \ | 2056 else \ |
| 2047 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ | 2057 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ |
| 2048 } while (0) | 2058 } while (0) |
| 2049 | 2059 |
| 2050 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \ | 2060 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \ |
| 2051 do { \ | 2061 do { \ |
| 2052 int c_alt, charset_alt; \ | 2062 int c_alt, charset_alt; \ |
| 2053 if (!NILP (unification_table) \ | 2063 if (!NILP (translation_table) \ |
| 2054 && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \ | 2064 && ((c_alt = translate_char (translation_table, -1, \ |
| 2055 >= 0)) \ | 2065 charset, c1, c2)) \ |
| 2056 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | 2066 >= 0)) \ |
| 2057 else \ | 2067 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ |
| 2058 charset_alt = charset; \ | 2068 else \ |
| 2059 if (charset_alt == charset_ascii) \ | 2069 charset_alt = charset; \ |
| 2060 *dst++ = c1; \ | 2070 if (charset_alt == charset_ascii) \ |
| 2061 else if (CHARSET_DIMENSION (charset_alt) == 1) \ | 2071 *dst++ = c1; \ |
| 2062 { \ | 2072 else if (CHARSET_DIMENSION (charset_alt) == 1) \ |
| 2063 if (sjis_p && charset_alt == charset_katakana_jisx0201) \ | 2073 { \ |
| 2064 *dst++ = c1; \ | 2074 if (sjis_p && charset_alt == charset_katakana_jisx0201) \ |
| 2065 else \ | 2075 *dst++ = c1; \ |
| 2066 { \ | 2076 else \ |
| 2067 *dst++ = charset_alt, *dst++ = c1; \ | 2077 { \ |
| 2068 coding->fake_multibyte = 1; \ | 2078 *dst++ = charset_alt, *dst++ = c1; \ |
| 2069 } \ | 2079 coding->fake_multibyte = 1; \ |
| 2070 } \ | 2080 } \ |
| 2071 else \ | 2081 } \ |
| 2072 { \ | 2082 else \ |
| 2073 c1 &= 0x7F, c2 &= 0x7F; \ | 2083 { \ |
| 2074 if (sjis_p && charset_alt == charset_jisx0208) \ | 2084 c1 &= 0x7F, c2 &= 0x7F; \ |
| 2075 { \ | 2085 if (sjis_p && charset_alt == charset_jisx0208) \ |
| 2076 unsigned char s1, s2; \ | 2086 { \ |
| 2077 \ | 2087 unsigned char s1, s2; \ |
| 2078 ENCODE_SJIS (c1, c2, s1, s2); \ | 2088 \ |
| 2079 *dst++ = s1, *dst++ = s2; \ | 2089 ENCODE_SJIS (c1, c2, s1, s2); \ |
| 2080 coding->fake_multibyte = 1; \ | 2090 *dst++ = s1, *dst++ = s2; \ |
| 2081 } \ | 2091 coding->fake_multibyte = 1; \ |
| 2082 else if (!sjis_p \ | 2092 } \ |
| 2083 && (charset_alt == charset_big5_1 \ | 2093 else if (!sjis_p \ |
| 2084 || charset_alt == charset_big5_2)) \ | 2094 && (charset_alt == charset_big5_1 \ |
| 2085 { \ | 2095 || charset_alt == charset_big5_2)) \ |
| 2086 unsigned char b1, b2; \ | 2096 { \ |
| 2087 \ | 2097 unsigned char b1, b2; \ |
| 2088 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ | 2098 \ |
| 2089 *dst++ = b1, *dst++ = b2; \ | 2099 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ |
| 2090 } \ | 2100 *dst++ = b1, *dst++ = b2; \ |
| 2091 else \ | 2101 } \ |
| 2092 { \ | 2102 else \ |
| 2093 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ | 2103 { \ |
| 2094 coding->fake_multibyte = 1; \ | 2104 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ |
| 2095 } \ | 2105 coding->fake_multibyte = 1; \ |
| 2096 } \ | 2106 } \ |
| 2097 coding->consumed_char++; \ | 2107 } \ |
| 2108 coding->consumed_char++; \ | |
| 2098 } while (0); | 2109 } while (0); |
| 2099 | 2110 |
| 2100 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 2111 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 2101 Check if a text is encoded in SJIS. If it is, return | 2112 Check if a text is encoded in SJIS. If it is, return |
| 2102 CODING_CATEGORY_MASK_SJIS, else return 0. */ | 2113 CODING_CATEGORY_MASK_SJIS, else return 0. */ |
| 2161 unsigned char *dst_end = destination + dst_bytes; | 2172 unsigned char *dst_end = destination + dst_bytes; |
| 2162 /* Since the maximum bytes produced by each loop is 4, we subtract 3 | 2173 /* Since the maximum bytes produced by each loop is 4, we subtract 3 |
| 2163 from DST_END to assure overflow checking is necessary only at the | 2174 from DST_END to assure overflow checking is necessary only at the |
| 2164 head of loop. */ | 2175 head of loop. */ |
| 2165 unsigned char *adjusted_dst_end = dst_end - 3; | 2176 unsigned char *adjusted_dst_end = dst_end - 3; |
| 2166 Lisp_Object unification_table | 2177 Lisp_Object translation_table |
| 2167 = coding->character_unification_table_for_decode; | 2178 = coding->character_translation_table_for_decode; |
| 2168 int result = CODING_FINISH_NORMAL; | 2179 int result = CODING_FINISH_NORMAL; |
| 2169 | 2180 |
| 2170 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 2181 if (!NILP (Venable_character_translation) && NILP (translation_table)) |
| 2171 unification_table = Vstandard_character_unification_table_for_decode; | 2182 translation_table = Vstandard_character_translation_table_for_decode; |
| 2172 | 2183 |
| 2173 coding->produced_char = 0; | 2184 coding->produced_char = 0; |
| 2174 coding->fake_multibyte = 0; | 2185 coding->fake_multibyte = 0; |
| 2175 while (src < src_end && (dst_bytes | 2186 while (src < src_end && (dst_bytes |
| 2176 ? (dst < adjusted_dst_end) | 2187 ? (dst < adjusted_dst_end) |
| 2350 unsigned char *dst_end = destination + dst_bytes; | 2361 unsigned char *dst_end = destination + dst_bytes; |
| 2351 /* Since the maximum bytes produced by each loop is 2, we subtract 1 | 2362 /* Since the maximum bytes produced by each loop is 2, we subtract 1 |
| 2352 from DST_END to assure overflow checking is necessary only at the | 2363 from DST_END to assure overflow checking is necessary only at the |
| 2353 head of loop. */ | 2364 head of loop. */ |
| 2354 unsigned char *adjusted_dst_end = dst_end - 1; | 2365 unsigned char *adjusted_dst_end = dst_end - 1; |
| 2355 Lisp_Object unification_table | 2366 Lisp_Object translation_table |
| 2356 = coding->character_unification_table_for_encode; | 2367 = coding->character_translation_table_for_encode; |
| 2357 int result = CODING_FINISH_NORMAL; | 2368 int result = CODING_FINISH_NORMAL; |
| 2358 | 2369 |
| 2359 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 2370 if (!NILP (Venable_character_translation) && NILP (translation_table)) |
| 2360 unification_table = Vstandard_character_unification_table_for_encode; | 2371 translation_table = Vstandard_character_translation_table_for_encode; |
| 2361 | 2372 |
| 2362 coding->consumed_char = 0; | 2373 coding->consumed_char = 0; |
| 2363 coding->fake_multibyte = 0; | 2374 coding->fake_multibyte = 0; |
| 2364 while (src < src_end && (dst_bytes | 2375 while (src < src_end && (dst_bytes |
| 2365 ? (dst < adjusted_dst_end) | 2376 ? (dst < adjusted_dst_end) |
| 2803 return 0; | 2814 return 0; |
| 2804 } | 2815 } |
| 2805 | 2816 |
| 2806 /* Initialize remaining fields. */ | 2817 /* Initialize remaining fields. */ |
| 2807 coding->composing = 0; | 2818 coding->composing = 0; |
| 2808 coding->character_unification_table_for_decode = Qnil; | 2819 coding->character_translation_table_for_decode = Qnil; |
| 2809 coding->character_unification_table_for_encode = Qnil; | 2820 coding->character_translation_table_for_encode = Qnil; |
| 2810 | 2821 |
| 2811 /* Get values of coding system properties: | 2822 /* Get values of coding system properties: |
| 2812 `post-read-conversion', `pre-write-conversion', | 2823 `post-read-conversion', `pre-write-conversion', |
| 2813 `character-unification-table-for-decode', | 2824 `character-translation-table-for-decode', |
| 2814 `character-unification-table-for-encode'. */ | 2825 `character-translation-table-for-encode'. */ |
| 2815 plist = XVECTOR (coding_spec)->contents[3]; | 2826 plist = XVECTOR (coding_spec)->contents[3]; |
| 2816 coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion); | 2827 coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion); |
| 2817 coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion); | 2828 coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion); |
| 2818 val = Fplist_get (plist, Qcharacter_unification_table_for_decode); | 2829 val = Fplist_get (plist, Qcharacter_translation_table_for_decode); |
| 2819 if (SYMBOLP (val)) | 2830 if (SYMBOLP (val)) |
| 2820 val = Fget (val, Qcharacter_unification_table_for_decode); | 2831 val = Fget (val, Qcharacter_translation_table_for_decode); |
| 2821 coding->character_unification_table_for_decode | 2832 coding->character_translation_table_for_decode |
| 2822 = CHAR_TABLE_P (val) ? val : Qnil; | 2833 = CHAR_TABLE_P (val) ? val : Qnil; |
| 2823 val = Fplist_get (plist, Qcharacter_unification_table_for_encode); | 2834 val = Fplist_get (plist, Qcharacter_translation_table_for_encode); |
| 2824 if (SYMBOLP (val)) | 2835 if (SYMBOLP (val)) |
| 2825 val = Fget (val, Qcharacter_unification_table_for_encode); | 2836 val = Fget (val, Qcharacter_translation_table_for_encode); |
| 2826 coding->character_unification_table_for_encode | 2837 coding->character_translation_table_for_encode |
| 2827 = CHAR_TABLE_P (val) ? val : Qnil; | 2838 = CHAR_TABLE_P (val) ? val : Qnil; |
| 2828 val = Fplist_get (plist, Qcoding_category); | 2839 val = Fplist_get (plist, Qcoding_category); |
| 2829 if (!NILP (val)) | 2840 if (!NILP (val)) |
| 2830 { | 2841 { |
| 2831 val = Fget (val, Qcoding_category_index); | 2842 val = Fget (val, Qcoding_category_index); |
| 5127 Fput (XVECTOR (Vcoding_category_table)->contents[i], | 5138 Fput (XVECTOR (Vcoding_category_table)->contents[i], |
| 5128 Qcoding_category_index, make_number (i)); | 5139 Qcoding_category_index, make_number (i)); |
| 5129 } | 5140 } |
| 5130 } | 5141 } |
| 5131 | 5142 |
| 5132 Qcharacter_unification_table = intern ("character-unification-table"); | 5143 Qcharacter_translation_table = intern ("character-translation-table"); |
| 5133 staticpro (&Qcharacter_unification_table); | 5144 staticpro (&Qcharacter_translation_table); |
| 5134 Fput (Qcharacter_unification_table, Qchar_table_extra_slots, | 5145 Fput (Qcharacter_translation_table, Qchar_table_extra_slots, |
| 5135 make_number (0)); | 5146 make_number (0)); |
| 5136 | 5147 |
| 5137 Qcharacter_unification_table_for_decode | 5148 Qcharacter_translation_table_id = intern ("character-translation-table-id"); |
| 5138 = intern ("character-unification-table-for-decode"); | 5149 staticpro (&Qcharacter_translation_table_id); |
| 5139 staticpro (&Qcharacter_unification_table_for_decode); | 5150 |
| 5140 | 5151 Qcharacter_translation_table_for_decode |
| 5141 Qcharacter_unification_table_for_encode | 5152 = intern ("character-translation-table-for-decode"); |
| 5142 = intern ("character-unification-table-for-encode"); | 5153 staticpro (&Qcharacter_translation_table_for_decode); |
| 5143 staticpro (&Qcharacter_unification_table_for_encode); | 5154 |
| 5155 Qcharacter_translation_table_for_encode | |
| 5156 = intern ("character-translation-table-for-encode"); | |
| 5157 staticpro (&Qcharacter_translation_table_for_encode); | |
| 5144 | 5158 |
| 5145 Qsafe_charsets = intern ("safe-charsets"); | 5159 Qsafe_charsets = intern ("safe-charsets"); |
| 5146 staticpro (&Qsafe_charsets); | 5160 staticpro (&Qsafe_charsets); |
| 5147 | 5161 |
| 5148 Qemacs_mule = intern ("emacs-mule"); | 5162 Qemacs_mule = intern ("emacs-mule"); |
| 5295 | 5309 |
| 5296 DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided, | 5310 DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided, |
| 5297 "Mnemonic character indicating end-of-line format is not yet decided."); | 5311 "Mnemonic character indicating end-of-line format is not yet decided."); |
| 5298 eol_mnemonic_undecided = ':'; | 5312 eol_mnemonic_undecided = ':'; |
| 5299 | 5313 |
| 5300 DEFVAR_LISP ("enable-character-unification", &Venable_character_unification, | 5314 DEFVAR_LISP ("enable-character-translation", &Venable_character_translation, |
| 5301 "Non-nil means ISO 2022 encoder/decoder do character unification."); | 5315 "Non-nil means ISO 2022 encoder/decoder do character translation."); |
| 5302 Venable_character_unification = Qt; | 5316 Venable_character_translation = Qt; |
| 5303 | 5317 |
| 5304 DEFVAR_LISP ("standard-character-unification-table-for-decode", | 5318 DEFVAR_LISP ("standard-character-translation-table-for-decode", |
| 5305 &Vstandard_character_unification_table_for_decode, | 5319 &Vstandard_character_translation_table_for_decode, |
| 5306 "Table for unifying characters when reading."); | 5320 "Table for translating characters while decoding."); |
| 5307 Vstandard_character_unification_table_for_decode = Qnil; | 5321 Vstandard_character_translation_table_for_decode = Qnil; |
| 5308 | 5322 |
| 5309 DEFVAR_LISP ("standard-character-unification-table-for-encode", | 5323 DEFVAR_LISP ("standard-character-translation-table-for-encode", |
| 5310 &Vstandard_character_unification_table_for_encode, | 5324 &Vstandard_character_translation_table_for_encode, |
| 5311 "Table for unifying characters when writing."); | 5325 "Table for translationg characters while encoding."); |
| 5312 Vstandard_character_unification_table_for_encode = Qnil; | 5326 Vstandard_character_translation_table_for_encode = Qnil; |
| 5313 | 5327 |
| 5314 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist, | 5328 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist, |
| 5315 "Alist of charsets vs revision numbers.\n\ | 5329 "Alist of charsets vs revision numbers.\n\ |
| 5316 While encoding, if a charset (car part of an element) is found,\n\ | 5330 While encoding, if a charset (car part of an element) is found,\n\ |
| 5317 designate it with the escape sequence identifing revision (cdr part of the element)."); | 5331 designate it with the escape sequence identifing revision (cdr part of the element)."); |
