Mercurial > emacs
comparison src/coding.c @ 22616:c493ce6a31e4
(setup_raw_text_coding_system): New function.
(decode_coding_sjis_big5): Bug for handling invalid code fixed.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Fri, 26 Jun 1998 03:29:15 +0000 |
| parents | d9eac134a41b |
| children | 3e3949ac4cfb |
comparison
equal
deleted
inserted
replaced
| 22615:bad8b17b0524 | 22616:c493ce6a31e4 |
|---|---|
| 1988 | 1988 |
| 1989 --- CODE RANGE of SJIS --- | 1989 --- CODE RANGE of SJIS --- |
| 1990 (character set) (range) | 1990 (character set) (range) |
| 1991 ASCII 0x00 .. 0x7F | 1991 ASCII 0x00 .. 0x7F |
| 1992 KATAKANA-JISX0201 0xA0 .. 0xDF | 1992 KATAKANA-JISX0201 0xA0 .. 0xDF |
| 1993 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xFF | 1993 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF |
| 1994 (2nd byte) 0x40 .. 0xFF | 1994 (2nd byte) 0x40 .. 0xFF |
| 1995 ------------------------------- | 1995 ------------------------------- |
| 1996 | 1996 |
| 1997 */ | 1997 */ |
| 1998 | 1998 |
| 2234 *dst++ = c1; | 2234 *dst++ = c1; |
| 2235 coding->produced_char++; | 2235 coding->produced_char++; |
| 2236 } | 2236 } |
| 2237 else if (c1 < 0x80) | 2237 else if (c1 < 0x80) |
| 2238 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); | 2238 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); |
| 2239 else if (c1 < 0xA0) | 2239 else |
| 2240 { | 2240 { |
| 2241 /* SJIS -> JISX0208 */ | |
| 2242 if (sjis_p) | 2241 if (sjis_p) |
| 2243 { | 2242 { |
| 2244 ONE_MORE_BYTE (c2); | 2243 if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0)) |
| 2245 if (c2 >= 0x40) | |
| 2246 { | 2244 { |
| 2247 DECODE_SJIS (c1, c2, c3, c4); | 2245 /* SJIS -> JISX0208 */ |
| 2248 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | 2246 ONE_MORE_BYTE (c2); |
| 2247 if (c2 >= 0x40) | |
| 2248 { | |
| 2249 DECODE_SJIS (c1, c2, c3, c4); | |
| 2250 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | |
| 2251 } | |
| 2252 else | |
| 2253 goto label_invalid_code_2; | |
| 2249 } | 2254 } |
| 2255 else if (c1 < 0xE0) | |
| 2256 /* SJIS -> JISX0201-Kana */ | |
| 2257 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | |
| 2258 /* dummy */ c2); | |
| 2250 else | 2259 else |
| 2251 goto label_invalid_code_2; | 2260 goto label_invalid_code_1; |
| 2252 } | |
| 2253 else | |
| 2254 goto label_invalid_code_1; | |
| 2255 } | |
| 2256 else if (c1 < 0xE0) | |
| 2257 { | |
| 2258 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ | |
| 2259 if (sjis_p) | |
| 2260 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | |
| 2261 /* dummy */ c2); | |
| 2262 else | |
| 2263 { | |
| 2264 int charset; | |
| 2265 | |
| 2266 ONE_MORE_BYTE (c2); | |
| 2267 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) | |
| 2268 { | |
| 2269 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
| 2270 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
| 2271 } | |
| 2272 else | |
| 2273 goto label_invalid_code_2; | |
| 2274 } | |
| 2275 } | |
| 2276 else /* C1 >= 0xE0 */ | |
| 2277 { | |
| 2278 /* SJIS -> JISX0208, BIG5 -> Big5 */ | |
| 2279 if (sjis_p) | |
| 2280 { | |
| 2281 ONE_MORE_BYTE (c2); | |
| 2282 if (c2 >= 0x40) | |
| 2283 { | |
| 2284 DECODE_SJIS (c1, c2, c3, c4); | |
| 2285 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | |
| 2286 } | |
| 2287 else | |
| 2288 goto label_invalid_code_2; | |
| 2289 } | 2261 } |
| 2290 else | 2262 else |
| 2291 { | 2263 { |
| 2292 int charset; | 2264 /* BIG5 -> Big5 */ |
| 2293 | 2265 if (c1 >= 0xA1 && c1 <= 0xFE) |
| 2294 ONE_MORE_BYTE (c2); | |
| 2295 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) | |
| 2296 { | 2266 { |
| 2297 DECODE_BIG5 (c1, c2, charset, c3, c4); | 2267 ONE_MORE_BYTE (c2); |
| 2298 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | 2268 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) |
| 2269 { | |
| 2270 int charset; | |
| 2271 | |
| 2272 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
| 2273 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
| 2274 } | |
| 2275 else | |
| 2276 goto label_invalid_code_2; | |
| 2299 } | 2277 } |
| 2300 else | 2278 else |
| 2301 goto label_invalid_code_2; | 2279 goto label_invalid_code_1; |
| 2302 } | 2280 } |
| 2303 } | 2281 } |
| 2304 continue; | 2282 continue; |
| 2305 | 2283 |
| 2306 label_invalid_code_1: | 2284 label_invalid_code_1: |
| 3083 coding->category_idx = CODING_CATEGORY_IDX_BINARY; | 3061 coding->category_idx = CODING_CATEGORY_IDX_BINARY; |
| 3084 coding->common_flags = 0; | 3062 coding->common_flags = 0; |
| 3085 coding->eol_type = CODING_EOL_LF; | 3063 coding->eol_type = CODING_EOL_LF; |
| 3086 coding->pre_write_conversion = coding->post_read_conversion = Qnil; | 3064 coding->pre_write_conversion = coding->post_read_conversion = Qnil; |
| 3087 return -1; | 3065 return -1; |
| 3066 } | |
| 3067 | |
| 3068 /* Setup raw-text or one of its subsidiaries in the structure | |
| 3069 coding_system CODING according to the already setup value eol_type | |
| 3070 in CODING. CODING should be setup for some coding system in | |
| 3071 advance. */ | |
| 3072 | |
| 3073 void | |
| 3074 setup_raw_text_coding_system (coding) | |
| 3075 struct coding_system *coding; | |
| 3076 { | |
| 3077 if (coding->type != coding_type_raw_text) | |
| 3078 { | |
| 3079 coding->symbol = Qraw_text; | |
| 3080 coding->type = coding_type_raw_text; | |
| 3081 if (coding->eol_type != CODING_EOL_UNDECIDED) | |
| 3082 { | |
| 3083 Lisp_Object subsidiaries = Fget (Qraw_text, Qeol_type); | |
| 3084 | |
| 3085 if (VECTORP (subsidiaries) | |
| 3086 && XVECTOR (subsidiaries)->size == 3) | |
| 3087 coding->symbol | |
| 3088 = XVECTOR (subsidiaries)->contents[coding->eol_type]; | |
| 3089 } | |
| 3090 } | |
| 3091 return; | |
| 3088 } | 3092 } |
| 3089 | 3093 |
| 3090 /* Emacs has a mechanism to automatically detect a coding system if it | 3094 /* Emacs has a mechanism to automatically detect a coding system if it |
| 3091 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, | 3095 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, |
| 3092 it's impossible to distinguish some coding systems accurately | 3096 it's impossible to distinguish some coding systems accurately |
