comparison src/coding.c @ 17320:9d15bec5f47e

(detect_coding_iso2022, detect_coding_mask): Ignore invalid ESC sequence for ISO 2022. (Fencode_sjis_char, Fencode_big5_char): Adjusted for the change of SPLIT_CHAR.
author Kenichi Handa <handa@m17n.org>
date Mon, 07 Apr 1997 07:12:13 +0000
parents ecacd8936926
children 265aa4625114
comparison
equal deleted inserted replaced
17319:a58d6ceeb370 17320:9d15bec5f47e
579 579
580 int 580 int
581 detect_coding_iso2022 (src, src_end) 581 detect_coding_iso2022 (src, src_end)
582 unsigned char *src, *src_end; 582 unsigned char *src, *src_end;
583 { 583 {
584 unsigned char c, g1 = 0; 584 int mask = CODING_CATEGORY_MASK_ANY;
585 int mask = (CODING_CATEGORY_MASK_ISO_7 585 int g1 = 0; /* 1 iff designating to G1. */
586 | CODING_CATEGORY_MASK_ISO_8_1 586 int c, i;
587 | CODING_CATEGORY_MASK_ISO_8_2);
588 /* We may look ahead at most 4 bytes. */
589 unsigned char *adjusted_src_end = src_end - 4;
590 int i;
591 587
592 while (src < src_end) 588 while (src < src_end)
593 { 589 {
594 c = *src++; 590 c = *src++;
595 switch (c) 591 switch (c)
596 { 592 {
597 case ISO_CODE_ESC: 593 case ISO_CODE_ESC:
598 if (src >= src_end) 594 if (src >= src_end)
599 break; 595 break;
600 c = *src++; 596 c = *src++;
601 if (src + 2 >= src_end 597 if (src < src_end
602 && ((c >= '(' && c <= '/') 598 && ((c >= '(' && c <= '/')
603 || c == '$' && ((*src >= '(' && *src <= '/') 599 || c == '$' && ((*src >= '(' && *src <= '/')
604 || (*src >= '@' && *src <= 'B')))) 600 || (*src >= '@' && *src <= 'B'))))
605 { 601 {
606 /* Valid designation sequence. */ 602 /* Valid designation sequence. */
603 mask &= (CODING_CATEGORY_MASK_ISO_7
604 | CODING_CATEGORY_MASK_ISO_8_1
605 | CODING_CATEGORY_MASK_ISO_8_2
606 | CODING_CATEGORY_MASK_ISO_ELSE);
607 if (c == ')' || (c == '$' && *src == ')')) 607 if (c == ')' || (c == '$' && *src == ')'))
608 g1 = 1; 608 {
609 g1 = 1;
610 mask &= ~CODING_CATEGORY_MASK_ISO_7;
611 }
609 src++; 612 src++;
610 break; 613 break;
611 } 614 }
612 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') 615 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
613 return CODING_CATEGORY_MASK_ISO_ELSE; 616 return CODING_CATEGORY_MASK_ISO_ELSE;
2360 unsigned char *src_end = src + src_bytes; 2363 unsigned char *src_end = src + src_bytes;
2361 int mask; 2364 int mask;
2362 2365
2363 /* At first, skip all ASCII characters and control characters except 2366 /* At first, skip all ASCII characters and control characters except
2364 for three ISO2022 specific control characters. */ 2367 for three ISO2022 specific control characters. */
2368 label_loop_detect_coding:
2365 while (src < src_end) 2369 while (src < src_end)
2366 { 2370 {
2367 c = *src; 2371 c = *src;
2368 if (c >= 0x80 2372 if (c >= 0x80
2369 || (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)) 2373 || (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2376 return CODING_CATEGORY_MASK_ANY; 2380 return CODING_CATEGORY_MASK_ANY;
2377 2381
2378 /* The text seems to be encoded in some multilingual coding system. 2382 /* The text seems to be encoded in some multilingual coding system.
2379 Now, try to find in which coding system the text is encoded. */ 2383 Now, try to find in which coding system the text is encoded. */
2380 if (c < 0x80) 2384 if (c < 0x80)
2381 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */ 2385 {
2382 /* C is an ISO2022 specific control code of C0. */ 2386 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
2383 mask = detect_coding_iso2022 (src, src_end); 2387 /* C is an ISO2022 specific control code of C0. */
2384 2388 mask = detect_coding_iso2022 (src, src_end);
2389 src++;
2390 if (mask == CODING_CATEGORY_MASK_ANY)
2391 /* No valid ISO2022 code follows C. Try again. */
2392 goto label_loop_detect_coding;
2393 }
2385 else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI) 2394 else if (c == ISO_CODE_SS2 || c == ISO_CODE_SS3 || c == ISO_CODE_CSI)
2386 /* C is an ISO2022 specific control code of C1, 2395 /* C is an ISO2022 specific control code of C1,
2387 or the first byte of SJIS's 2-byte character code, 2396 or the first byte of SJIS's 2-byte character code,
2388 or a leading code of Emacs. */ 2397 or a leading code of Emacs. */
2389 mask = (detect_coding_iso2022 (src, src_end) 2398 mask = (detect_coding_iso2022 (src, src_end)
3223 "Encode a JISX0208 character CHAR to SJIS coding-system.\n\ 3232 "Encode a JISX0208 character CHAR to SJIS coding-system.\n\
3224 Return the corresponding character code in SJIS.") 3233 Return the corresponding character code in SJIS.")
3225 (ch) 3234 (ch)
3226 Lisp_Object ch; 3235 Lisp_Object ch;
3227 { 3236 {
3228 int charset; 3237 int charset, c1, c2, s1, s2;
3229 unsigned char c1, c2, s1, s2;
3230 Lisp_Object val; 3238 Lisp_Object val;
3231 3239
3232 CHECK_NUMBER (ch, 0); 3240 CHECK_NUMBER (ch, 0);
3233 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); 3241 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
3234 if (charset == charset_jisx0208) 3242 if (charset == charset_jisx0208)
3235 { 3243 {
3236 ENCODE_SJIS (c1, c2, s1, s2); 3244 ENCODE_SJIS (c1, c2, s1, s2);
3237 XSETFASTINT (val, ((int)s1 << 8) | s2); 3245 XSETFASTINT (val, (s1 << 8) | s2);
3238 } 3246 }
3239 else 3247 else
3240 XSETFASTINT (val, 0); 3248 XSETFASTINT (val, 0);
3241 return val; 3249 return val;
3242 } 3250 }
3263 "Encode the Big5 character CHAR to BIG5 coding-system.\n\ 3271 "Encode the Big5 character CHAR to BIG5 coding-system.\n\
3264 Return the corresponding character code in Big5.") 3272 Return the corresponding character code in Big5.")
3265 (ch) 3273 (ch)
3266 Lisp_Object ch; 3274 Lisp_Object ch;
3267 { 3275 {
3268 int charset; 3276 int charset, c1, c2, b1, b2;
3269 unsigned char c1, c2, b1, b2;
3270 Lisp_Object val; 3277 Lisp_Object val;
3271 3278
3272 CHECK_NUMBER (ch, 0); 3279 CHECK_NUMBER (ch, 0);
3273 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); 3280 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
3274 if (charset == charset_big5_1 || charset == charset_big5_2) 3281 if (charset == charset_big5_1 || charset == charset_big5_2)
3275 { 3282 {
3276 ENCODE_BIG5 (charset, c1, c2, b1, b2); 3283 ENCODE_BIG5 (charset, c1, c2, b1, b2);
3277 XSETFASTINT (val, ((int)b1 << 8) | b2); 3284 XSETFASTINT (val, (b1 << 8) | b2);
3278 } 3285 }
3279 else 3286 else
3280 XSETFASTINT (val, 0); 3287 XSETFASTINT (val, 0);
3281 return val; 3288 return val;
3282 } 3289 }