Mercurial > emacs
comparison src/coding.c @ 89764:e3d26232ab78
(decode_coding_sjis): Handle 4th charset (typically JISX0212).
(encode_coding_sjis): Likewise.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Tue, 27 Jan 2004 12:11:41 +0000 |
| parents | d11866e9fbf4 |
| children | 74c503490829 |
comparison
equal
deleted
inserted
replaced
| 89763:82bfb1c12ea2 | 89764:e3d26232ab78 |
|---|---|
| 3996 int *charbuf = coding->charbuf; | 3996 int *charbuf = coding->charbuf; |
| 3997 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; | 3997 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
| 3998 int consumed_chars = 0, consumed_chars_base; | 3998 int consumed_chars = 0, consumed_chars_base; |
| 3999 int multibytep = coding->src_multibyte; | 3999 int multibytep = coding->src_multibyte; |
| 4000 struct charset *charset_roman, *charset_kanji, *charset_kana; | 4000 struct charset *charset_roman, *charset_kanji, *charset_kana; |
| 4001 struct charset *charset_kanji2; | |
| 4001 Lisp_Object attrs, charset_list, val; | 4002 Lisp_Object attrs, charset_list, val; |
| 4002 int char_offset = coding->produced_char; | 4003 int char_offset = coding->produced_char; |
| 4003 int last_offset = char_offset; | 4004 int last_offset = char_offset; |
| 4004 int last_id = charset_ascii; | 4005 int last_id = charset_ascii; |
| 4005 | 4006 |
| 4006 CODING_GET_INFO (coding, attrs, charset_list); | 4007 CODING_GET_INFO (coding, attrs, charset_list); |
| 4007 | 4008 |
| 4008 val = charset_list; | 4009 val = charset_list; |
| 4009 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4010 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4010 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4011 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4011 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); | 4012 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4013 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val))); | |
| 4012 | 4014 |
| 4013 while (1) | 4015 while (1) |
| 4014 { | 4016 { |
| 4015 int c, c1; | 4017 int c, c1; |
| 4016 struct charset *charset; | 4018 struct charset *charset; |
| 4024 ONE_MORE_BYTE (c); | 4026 ONE_MORE_BYTE (c); |
| 4025 if (c < 0) | 4027 if (c < 0) |
| 4026 goto invalid_code; | 4028 goto invalid_code; |
| 4027 if (c < 0x80) | 4029 if (c < 0x80) |
| 4028 charset = charset_roman; | 4030 charset = charset_roman; |
| 4029 else if (c == 0x80) | 4031 else if (c == 0x80 || c == 0xA0) |
| 4030 goto invalid_code; | 4032 goto invalid_code; |
| 4033 else if (c >= 0xA1 && c <= 0xDF) | |
| 4034 { | |
| 4035 /* SJIS -> JISX0201-Kana */ | |
| 4036 c &= 0x7F; | |
| 4037 charset = charset_kana; | |
| 4038 } | |
| 4039 else if (c <= 0xEF) | |
| 4040 { | |
| 4041 /* SJIS -> JISX0208 */ | |
| 4042 ONE_MORE_BYTE (c1); | |
| 4043 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC) | |
| 4044 goto invalid_code; | |
| 4045 c = (c << 8) | c1; | |
| 4046 SJIS_TO_JIS (c); | |
| 4047 charset = charset_kanji; | |
| 4048 } | |
| 4049 else if (c <= 0xFC && charset_kanji2) | |
| 4050 { | |
| 4051 /* SJIS -> JISX0212 */ | |
| 4052 ONE_MORE_BYTE (c1); | |
| 4053 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC) | |
| 4054 goto invalid_code; | |
| 4055 c = (c << 8) | c1; | |
| 4056 SJIS_TO_JIS2 (c); | |
| 4057 charset = charset_kanji2; | |
| 4058 } | |
| 4031 else | 4059 else |
| 4032 { | 4060 goto invalid_code; |
| 4033 if (c >= 0xF0) | |
| 4034 goto invalid_code; | |
| 4035 if (c < 0xA0 || c >= 0xE0) | |
| 4036 { | |
| 4037 /* SJIS -> JISX0208 */ | |
| 4038 ONE_MORE_BYTE (c1); | |
| 4039 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC) | |
| 4040 goto invalid_code; | |
| 4041 c = (c << 8) | c1; | |
| 4042 SJIS_TO_JIS (c); | |
| 4043 charset = charset_kanji; | |
| 4044 } | |
| 4045 else if (c > 0xA0) | |
| 4046 { | |
| 4047 /* SJIS -> JISX0201-Kana */ | |
| 4048 c &= 0x7F; | |
| 4049 charset = charset_kana; | |
| 4050 } | |
| 4051 else | |
| 4052 goto invalid_code; | |
| 4053 } | |
| 4054 if (charset->id != charset_ascii | 4061 if (charset->id != charset_ascii |
| 4055 && last_id != charset->id) | 4062 && last_id != charset->id) |
| 4056 { | 4063 { |
| 4057 if (last_id != charset_ascii) | 4064 if (last_id != charset_ascii) |
| 4058 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | 4065 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); |
| 4181 int safe_room = 4; | 4188 int safe_room = 4; |
| 4182 int produced_chars = 0; | 4189 int produced_chars = 0; |
| 4183 Lisp_Object attrs, charset_list, val; | 4190 Lisp_Object attrs, charset_list, val; |
| 4184 int ascii_compatible; | 4191 int ascii_compatible; |
| 4185 struct charset *charset_roman, *charset_kanji, *charset_kana; | 4192 struct charset *charset_roman, *charset_kanji, *charset_kana; |
| 4193 struct charset *charset_kanji2; | |
| 4186 int c; | 4194 int c; |
| 4187 | 4195 |
| 4188 CODING_GET_INFO (coding, attrs, charset_list); | 4196 CODING_GET_INFO (coding, attrs, charset_list); |
| 4189 val = charset_list; | 4197 val = charset_list; |
| 4190 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4198 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4191 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4199 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4192 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); | 4200 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4201 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val))); | |
| 4193 | 4202 |
| 4194 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); | 4203 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); |
| 4195 | 4204 |
| 4196 while (charbuf < charbuf_end) | 4205 while (charbuf < charbuf_end) |
| 4197 { | 4206 { |
| 4232 c1 = code >> 8, c2 = code & 0xFF; | 4241 c1 = code >> 8, c2 = code & 0xFF; |
| 4233 EMIT_TWO_BYTES (c1, c2); | 4242 EMIT_TWO_BYTES (c1, c2); |
| 4234 } | 4243 } |
| 4235 else if (charset == charset_kana) | 4244 else if (charset == charset_kana) |
| 4236 EMIT_ONE_BYTE (code | 0x80); | 4245 EMIT_ONE_BYTE (code | 0x80); |
| 4246 else if (charset_kanji2 && charset == charset_kanji2) | |
| 4247 { | |
| 4248 int c1, c2; | |
| 4249 | |
| 4250 c1 = code >> 8; | |
| 4251 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25) | |
| 4252 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E) | |
| 4253 { | |
| 4254 JIS_TO_SJIS2 (code); | |
| 4255 c1 = code >> 8, c2 = code & 0xFF; | |
| 4256 EMIT_TWO_BYTES (c1, c2); | |
| 4257 } | |
| 4258 else | |
| 4259 EMIT_ONE_ASCII_BYTE (code & 0x7F); | |
| 4260 } | |
| 4237 else | 4261 else |
| 4238 EMIT_ONE_ASCII_BYTE (code & 0x7F); | 4262 EMIT_ONE_ASCII_BYTE (code & 0x7F); |
| 4239 } | 4263 } |
| 4240 } | 4264 } |
| 4241 record_conversion_result (coding, CODING_RESULT_SUCCESS); | 4265 record_conversion_result (coding, CODING_RESULT_SUCCESS); |
