Mercurial > emacs
comparison src/coding.c @ 17320:9d15bec5f47e
(detect_coding_iso2022, detect_coding_mask): Ignore
invalid ESC sequence for ISO 2022.
(Fencode_sjis_char, Fencode_big5_char): Adjusted for the change of
SPLIT_CHAR.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Mon, 07 Apr 1997 07:12:13 +0000 |
| parents | ecacd8936926 |
| children | 265aa4625114 |
comparison
equal
deleted
inserted
replaced
| 17319:a58d6ceeb370 | 17320:9d15bec5f47e |
|---|---|
| 579 | 579 |
| 580 int | 580 int |
| 581 detect_coding_iso2022 (src, src_end) | 581 detect_coding_iso2022 (src, src_end) |
| 582 unsigned char *src, *src_end; | 582 unsigned char *src, *src_end; |
| 583 { | 583 { |
| 584 unsigned char c, g1 = 0; | 584 int mask = CODING_CATEGORY_MASK_ANY; |
| 585 int mask = (CODING_CATEGORY_MASK_ISO_7 | 585 int g1 = 0; /* 1 iff designating to G1. */ |
| 586 | CODING_CATEGORY_MASK_ISO_8_1 | 586 int c, i; |
| 587 | CODING_CATEGORY_MASK_ISO_8_2); | |
| 588 /* We may look ahead at most 4 bytes. */ | |
| 589 unsigned char *adjusted_src_end = src_end - 4; | |
| 590 int i; | |
| 591 | 587 |
| 592 while (src < src_end) | 588 while (src < src_end) |
| 593 { | 589 { |
| 594 c = *src++; | 590 c = *src++; |
| 595 switch (c) | 591 switch (c) |
| 596 { | 592 { |
| 597 case ISO_CODE_ESC: | 593 case ISO_CODE_ESC: |
| 598 if (src >= src_end) | 594 if (src >= src_end) |
| 599 break; | 595 break; |
| 600 c = *src++; | 596 c = *src++; |
| 601 if (src + 2 >= src_end | 597 if (src < src_end |
| 602 && ((c >= '(' && c <= '/') | 598 && ((c >= '(' && c <= '/') |
| 603 || c == '$' && ((*src >= '(' && *src <= '/') | 599 || c == '$' && ((*src >= '(' && *src <= '/') |
| 604 || (*src >= '@' && *src <= 'B')))) | 600 || (*src >= '@' && *src <= 'B')))) |
| 605 { | 601 { |
| 606 /* Valid designation sequence. */ | 602 /* Valid designation sequence. */ |
| 603 mask &= (CODING_CATEGORY_MASK_ISO_7 | |
| 604 | CODING_CATEGORY_MASK_ISO_8_1 | |
| 605 | CODING_CATEGORY_MASK_ISO_8_2 | |
| 606 | CODING_CATEGORY_MASK_ISO_ELSE); | |
| 607 if (c == ')' || (c == '$' && *src == ')')) | 607 if (c == ')' || (c == '$' && *src == ')')) |
| 608 g1 = 1; | 608 { |
| 609 g1 = 1; | |
| 610 mask &= ~CODING_CATEGORY_MASK_ISO_7; | |
| 611 } | |
| 609 src++; | 612 src++; |
| 610 break; | 613 break; |
| 611 } | 614 } |
| 612 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') | 615 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') |
| 613 return CODING_CATEGORY_MASK_ISO_ELSE; | 616 return CODING_CATEGORY_MASK_ISO_ELSE; |
| 2360 unsigned char *src_end = src + src_bytes; | 2363 unsigned char *src_end = src + src_bytes; |
| 2361 int mask; | 2364 int mask; |
| 2362 | 2365 |
| 2363 /* At first, skip all ASCII characters and control characters except | 2366 /* At first, skip all ASCII characters and control characters except |
| 2364 for three ISO2022 specific control characters. */ | 2367 for three ISO2022 specific control characters. */ |
| 2368 label_loop_detect_coding: | |
| 2365 while (src < src_end) | 2369 while (src < src_end) |
| 2366 { | 2370 { |
| 2367 c = *src; | 2371 c = *src; |
| 2368 if (c >= 0x80 | 2372 if (c >= 0x80 |
| 2369 || (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)) | 2373 || (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)) |
| 2376 return CODING_CATEGORY_MASK_ANY; | 2380 return CODING_CATEGORY_MASK_ANY; |
| 2377 | 2381 |
| 2378 /* The text seems to be encoded in some multilingual coding system. | 2382 /* The text seems to be encoded in some multilingual coding system. |
| 2379 Now, try to find in which coding system the text is encoded. */ | 2383 Now, try to find in which coding system the text is encoded. */ |
| 2380 if (c < 0x80) | 2384 if (c < 0x80) |
| 2381 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */ | 2385 { |
| 2382 /* C is an ISO2022 specific control code of C0. */ | 2386 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */ |
| 2383 mask = detect_coding_iso2022 (src, src_end); | 2387 /* C is an ISO2022 specific control code of C0. */ |
| 2384 | 2388 mask = detect_coding_iso2022 (src, src_end); |
| 2389 src++; | |
| 2390 if (mask == CODING_CATEGORY_MASK_ANY) | |
| 2391 /* No valid ISO2022 code follows C. Try again. */ | |
| 2392 goto label_loop_detect_coding; | |
| 2393 } | |
| 2385 else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI) | 2394 else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI) |
| 2386 /* C is an ISO2022 specific control code of C1, | 2395 /* C is an ISO2022 specific control code of C1, |
| 2387 or the first byte of SJIS's 2-byte character code, | 2396 or the first byte of SJIS's 2-byte character code, |
| 2388 or a leading code of Emacs. */ | 2397 or a leading code of Emacs. */ |
| 2389 mask = (detect_coding_iso2022 (src, src_end) | 2398 mask = (detect_coding_iso2022 (src, src_end) |
| 3223 "Encode a JISX0208 character CHAR to SJIS coding-system.\n\ | 3232 "Encode a JISX0208 character CHAR to SJIS coding-system.\n\ |
| 3224 Return the corresponding character code in SJIS.") | 3233 Return the corresponding character code in SJIS.") |
| 3225 (ch) | 3234 (ch) |
| 3226 Lisp_Object ch; | 3235 Lisp_Object ch; |
| 3227 { | 3236 { |
| 3228 int charset; | 3237 int charset, c1, c2, s1, s2; |
| 3229 unsigned char c1, c2, s1, s2; | |
| 3230 Lisp_Object val; | 3238 Lisp_Object val; |
| 3231 | 3239 |
| 3232 CHECK_NUMBER (ch, 0); | 3240 CHECK_NUMBER (ch, 0); |
| 3233 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); | 3241 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); |
| 3234 if (charset == charset_jisx0208) | 3242 if (charset == charset_jisx0208) |
| 3235 { | 3243 { |
| 3236 ENCODE_SJIS (c1, c2, s1, s2); | 3244 ENCODE_SJIS (c1, c2, s1, s2); |
| 3237 XSETFASTINT (val, ((int)s1 << 8) | s2); | 3245 XSETFASTINT (val, (s1 << 8) | s2); |
| 3238 } | 3246 } |
| 3239 else | 3247 else |
| 3240 XSETFASTINT (val, 0); | 3248 XSETFASTINT (val, 0); |
| 3241 return val; | 3249 return val; |
| 3242 } | 3250 } |
| 3263 "Encode the Big5 character CHAR to BIG5 coding-system.\n\ | 3271 "Encode the Big5 character CHAR to BIG5 coding-system.\n\ |
| 3264 Return the corresponding character code in Big5.") | 3272 Return the corresponding character code in Big5.") |
| 3265 (ch) | 3273 (ch) |
| 3266 Lisp_Object ch; | 3274 Lisp_Object ch; |
| 3267 { | 3275 { |
| 3268 int charset; | 3276 int charset, c1, c2, b1, b2; |
| 3269 unsigned char c1, c2, b1, b2; | |
| 3270 Lisp_Object val; | 3277 Lisp_Object val; |
| 3271 | 3278 |
| 3272 CHECK_NUMBER (ch, 0); | 3279 CHECK_NUMBER (ch, 0); |
| 3273 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); | 3280 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); |
| 3274 if (charset == charset_big5_1 || charset == charset_big5_2) | 3281 if (charset == charset_big5_1 || charset == charset_big5_2) |
| 3275 { | 3282 { |
| 3276 ENCODE_BIG5 (charset, c1, c2, b1, b2); | 3283 ENCODE_BIG5 (charset, c1, c2, b1, b2); |
| 3277 XSETFASTINT (val, ((int)b1 << 8) | b2); | 3284 XSETFASTINT (val, (b1 << 8) | b2); |
| 3278 } | 3285 } |
| 3279 else | 3286 else |
| 3280 XSETFASTINT (val, 0); | 3287 XSETFASTINT (val, 0); |
| 3281 return val; | 3288 return val; |
| 3282 } | 3289 } |
