comparison src/coding.c @ 89764:e3d26232ab78

(decode_coding_sjis): Handle 4th charset (typically JISX0212). (encode_coding_sjis): Likewise.
author Kenichi Handa <handa@m17n.org>
date Tue, 27 Jan 2004 12:11:41 +0000
parents d11866e9fbf4
children 74c503490829
comparison
equal deleted inserted replaced
89763:82bfb1c12ea2 89764:e3d26232ab78
3996 int *charbuf = coding->charbuf; 3996 int *charbuf = coding->charbuf;
3997 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 3997 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3998 int consumed_chars = 0, consumed_chars_base; 3998 int consumed_chars = 0, consumed_chars_base;
3999 int multibytep = coding->src_multibyte; 3999 int multibytep = coding->src_multibyte;
4000 struct charset *charset_roman, *charset_kanji, *charset_kana; 4000 struct charset *charset_roman, *charset_kanji, *charset_kana;
4001 struct charset *charset_kanji2;
4001 Lisp_Object attrs, charset_list, val; 4002 Lisp_Object attrs, charset_list, val;
4002 int char_offset = coding->produced_char; 4003 int char_offset = coding->produced_char;
4003 int last_offset = char_offset; 4004 int last_offset = char_offset;
4004 int last_id = charset_ascii; 4005 int last_id = charset_ascii;
4005 4006
4006 CODING_GET_INFO (coding, attrs, charset_list); 4007 CODING_GET_INFO (coding, attrs, charset_list);
4007 4008
4008 val = charset_list; 4009 val = charset_list;
4009 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4010 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4010 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4011 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4011 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); 4012 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4013 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4012 4014
4013 while (1) 4015 while (1)
4014 { 4016 {
4015 int c, c1; 4017 int c, c1;
4016 struct charset *charset; 4018 struct charset *charset;
4024 ONE_MORE_BYTE (c); 4026 ONE_MORE_BYTE (c);
4025 if (c < 0) 4027 if (c < 0)
4026 goto invalid_code; 4028 goto invalid_code;
4027 if (c < 0x80) 4029 if (c < 0x80)
4028 charset = charset_roman; 4030 charset = charset_roman;
4029 else if (c == 0x80) 4031 else if (c == 0x80 || c == 0xA0)
4030 goto invalid_code; 4032 goto invalid_code;
4033 else if (c >= 0xA1 && c <= 0xDF)
4034 {
4035 /* SJIS -> JISX0201-Kana */
4036 c &= 0x7F;
4037 charset = charset_kana;
4038 }
4039 else if (c <= 0xEF)
4040 {
4041 /* SJIS -> JISX0208 */
4042 ONE_MORE_BYTE (c1);
4043 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4044 goto invalid_code;
4045 c = (c << 8) | c1;
4046 SJIS_TO_JIS (c);
4047 charset = charset_kanji;
4048 }
4049 else if (c <= 0xFC && charset_kanji2)
4050 {
4051 /* SJIS -> JISX0212 */
4052 ONE_MORE_BYTE (c1);
4053 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4054 goto invalid_code;
4055 c = (c << 8) | c1;
4056 SJIS_TO_JIS2 (c);
4057 charset = charset_kanji2;
4058 }
4031 else 4059 else
4032 { 4060 goto invalid_code;
4033 if (c >= 0xF0)
4034 goto invalid_code;
4035 if (c < 0xA0 || c >= 0xE0)
4036 {
4037 /* SJIS -> JISX0208 */
4038 ONE_MORE_BYTE (c1);
4039 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4040 goto invalid_code;
4041 c = (c << 8) | c1;
4042 SJIS_TO_JIS (c);
4043 charset = charset_kanji;
4044 }
4045 else if (c > 0xA0)
4046 {
4047 /* SJIS -> JISX0201-Kana */
4048 c &= 0x7F;
4049 charset = charset_kana;
4050 }
4051 else
4052 goto invalid_code;
4053 }
4054 if (charset->id != charset_ascii 4061 if (charset->id != charset_ascii
4055 && last_id != charset->id) 4062 && last_id != charset->id)
4056 { 4063 {
4057 if (last_id != charset_ascii) 4064 if (last_id != charset_ascii)
4058 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 4065 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4181 int safe_room = 4; 4188 int safe_room = 4;
4182 int produced_chars = 0; 4189 int produced_chars = 0;
4183 Lisp_Object attrs, charset_list, val; 4190 Lisp_Object attrs, charset_list, val;
4184 int ascii_compatible; 4191 int ascii_compatible;
4185 struct charset *charset_roman, *charset_kanji, *charset_kana; 4192 struct charset *charset_roman, *charset_kanji, *charset_kana;
4193 struct charset *charset_kanji2;
4186 int c; 4194 int c;
4187 4195
4188 CODING_GET_INFO (coding, attrs, charset_list); 4196 CODING_GET_INFO (coding, attrs, charset_list);
4189 val = charset_list; 4197 val = charset_list;
4190 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4198 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4191 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4199 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4192 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); 4200 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4201 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4193 4202
4194 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); 4203 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4195 4204
4196 while (charbuf < charbuf_end) 4205 while (charbuf < charbuf_end)
4197 { 4206 {
4232 c1 = code >> 8, c2 = code & 0xFF; 4241 c1 = code >> 8, c2 = code & 0xFF;
4233 EMIT_TWO_BYTES (c1, c2); 4242 EMIT_TWO_BYTES (c1, c2);
4234 } 4243 }
4235 else if (charset == charset_kana) 4244 else if (charset == charset_kana)
4236 EMIT_ONE_BYTE (code | 0x80); 4245 EMIT_ONE_BYTE (code | 0x80);
4246 else if (charset_kanji2 && charset == charset_kanji2)
4247 {
4248 int c1, c2;
4249
4250 c1 = code >> 8;
4251 if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4252 || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4253 {
4254 JIS_TO_SJIS2 (code);
4255 c1 = code >> 8, c2 = code & 0xFF;
4256 EMIT_TWO_BYTES (c1, c2);
4257 }
4258 else
4259 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4260 }
4237 else 4261 else
4238 EMIT_ONE_ASCII_BYTE (code & 0x7F); 4262 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4239 } 4263 }
4240 } 4264 }
4241 record_conversion_result (coding, CODING_RESULT_SUCCESS); 4265 record_conversion_result (coding, CODING_RESULT_SUCCESS);