comparison src/coding.c @ 28022:6c41f3276340

Add comments on coding-category-utf-8, coding-category-utf-16-be, and coding-category-utf-16-le. (coding_category_name): Include "coding-category-utf-8", "coding-category-utf-16-be", and "coding-category-utf-16-le". (UTF_8_1_OCTET_P) (UTF_8_EXTRA_OCTET_P) (UTF_8_2_OCTET_LEADING_P) (UTF_8_3_OCTET_LEADING_P) (UTF_8_4_OCTET_LEADING_P) (UTF_8_5_OCTET_LEADING_P) (UTF_8_6_OCTET_LEADING_P): New macros. (detect_coding_utf_8): New function. (UTF_16_INVALID_P) (TF_16_HIGH_SURROGATE_P) (UTF_16_LOW_SURROGATE_P): New macros. (detect_coding_utf_16): New function. (detect_coding_mask): Fix bug of returning wrong mask bits in the case that detect_coding_XXX returns a mask not set in priorities[i]. (detect_eol_type_in_2_octet_form): New function. (detect_eol): If cooding->category_idx is for UTF-16, call detect_eol_type_in_2_octet_form instead of dectect_eol_type. (detect_coding_system): Don't include `nil' coding-system in the result. (Fupdate_coding_systems_internal): Update all coding-categories.
author Kenichi Handa <handa@m17n.org>
date Tue, 07 Mar 2000 06:17:54 +0000
parents c2e0998057f9
children 01292435daaf
comparison
equal deleted inserted replaced
28021:e34a172ee77e 28022:6c41f3276340
360 "coding-category-iso-8-2", 360 "coding-category-iso-8-2",
361 "coding-category-iso-7-else", 361 "coding-category-iso-7-else",
362 "coding-category-iso-8-else", 362 "coding-category-iso-8-else",
363 "coding-category-ccl", 363 "coding-category-ccl",
364 "coding-category-big5", 364 "coding-category-big5",
365 "coding-category-utf-8",
366 "coding-category-utf-16-be",
367 "coding-category-utf-16-le",
365 "coding-category-raw-text", 368 "coding-category-raw-text",
366 "coding-category-binary" 369 "coding-category-binary"
367 }; 370 };
368 371
369 /* Table of pointers to coding systems corresponding to each coding 372 /* Table of pointers to coding systems corresponding to each coding
2346 } 2349 }
2347 } 2350 }
2348 return CODING_CATEGORY_MASK_BIG5; 2351 return CODING_CATEGORY_MASK_BIG5;
2349 } 2352 }
2350 2353
2354 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2355 Check if a text is encoded in UTF-8. If it is, return
2356 CODING_CATEGORY_MASK_UTF_8, else return 0. */
2357
2358 #define UTF_8_1_OCTET_P(c) ((c) < 0x80)
2359 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
2360 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2361 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2362 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2363 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2364 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2365
2366 int
2367 detect_coding_utf_8 (src, src_end)
2368 unsigned char *src, *src_end;
2369 {
2370 unsigned char c;
2371 int seq_maybe_bytes;
2372
2373 while (src < src_end)
2374 {
2375 c = *src++;
2376 if (UTF_8_1_OCTET_P (c))
2377 continue;
2378 else if (UTF_8_2_OCTET_LEADING_P (c))
2379 seq_maybe_bytes = 1;
2380 else if (UTF_8_3_OCTET_LEADING_P (c))
2381 seq_maybe_bytes = 2;
2382 else if (UTF_8_4_OCTET_LEADING_P (c))
2383 seq_maybe_bytes = 3;
2384 else if (UTF_8_5_OCTET_LEADING_P (c))
2385 seq_maybe_bytes = 4;
2386 else if (UTF_8_6_OCTET_LEADING_P (c))
2387 seq_maybe_bytes = 5;
2388 else
2389 return 0;
2390
2391 do
2392 {
2393 if (src >= src_end)
2394 return CODING_CATEGORY_MASK_UTF_8;
2395
2396 c = *src++;
2397 if (!UTF_8_EXTRA_OCTET_P (c))
2398 return 0;
2399 seq_maybe_bytes--;
2400 }
2401 while (seq_maybe_bytes > 0);
2402 }
2403
2404 return CODING_CATEGORY_MASK_UTF_8;
2405 }
2406
2407 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2408 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2409 Little Endian (otherwise). If it is, return
2410 CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2411 else return 0. */
2412
2413 #define UTF_16_INVALID_P(val) \
2414 (((val) == 0xFFFE) \
2415 || ((val) == 0xFFFF))
2416
2417 #define UTF_16_HIGH_SURROGATE_P(val) \
2418 (((val) & 0xD800) == 0xD800)
2419
2420 #define UTF_16_LOW_SURROGATE_P(val) \
2421 (((val) & 0xDC00) == 0xDC00)
2422
2423 int
2424 detect_coding_utf_16 (src, src_end)
2425 unsigned char *src, *src_end;
2426 {
2427 if ((src + 1) >= src_end) return 0;
2428
2429 if ((src[0] == 0xFF) && (src[1] == 0xFE))
2430 return CODING_CATEGORY_MASK_UTF_16_LE;
2431 else if ((src[0] == 0xFE) && (src[1] == 0xFF))
2432 return CODING_CATEGORY_MASK_UTF_16_BE;
2433
2434 return 0;
2435 }
2436
2351 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". 2437 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2352 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ 2438 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
2353 2439
2354 int 2440 int
2355 decode_coding_sjis_big5 (coding, source, destination, 2441 decode_coding_sjis_big5 (coding, source, destination,
3451 3537
3452 The category for a coding system which has the same code range 3538 The category for a coding system which has the same code range
3453 as BIG5. Assigned the coding-system (Lisp symbol) 3539 as BIG5. Assigned the coding-system (Lisp symbol)
3454 `cn-big5' by default. 3540 `cn-big5' by default.
3455 3541
3542 o coding-category-utf-8
3543
3544 The category for a coding system which has the same code range
3545 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp
3546 symbol) `utf-8' by default.
3547
3548 o coding-category-utf-16-be
3549
3550 The category for a coding system in which a text has an
3551 Unicode signature (cf. Unicode Standard) in the order of BIG
3552 endian at the head. Assigned the coding-system (Lisp symbol)
3553 `utf-16-be' by default.
3554
3555 o coding-category-utf-16-le
3556
3557 The category for a coding system in which a text has an
3558 Unicode signature (cf. Unicode Standard) in the order of
3559 LITTLE endian at the head. Assigned the coding-system (Lisp
3560 symbol) `utf-16-le' by default.
3561
3456 o coding-category-ccl 3562 o coding-category-ccl
3457 3563
3458 The category for a coding system of which encoder/decoder is 3564 The category for a coding system of which encoder/decoder is
3459 written in CCL programs. The default value is nil, i.e., no 3565 written in CCL programs. The default value is nil, i.e., no
3460 coding system is assigned. 3566 coding system is assigned.
3479 int ascii_skip_code[256]; 3585 int ascii_skip_code[256];
3480 3586
3481 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded. 3587 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3482 If it detects possible coding systems, return an integer in which 3588 If it detects possible coding systems, return an integer in which
3483 appropriate flag bits are set. Flag bits are defined by macros 3589 appropriate flag bits are set. Flag bits are defined by macros
3484 CODING_CATEGORY_MASK_XXX in `coding.h'. 3590 CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL,
3591 it should point the table `coding_priorities'. In that case, only
3592 the flag bit for a coding system of the highest priority is set in
3593 the returned value.
3485 3594
3486 How many ASCII characters are at the head is returned as *SKIP. */ 3595 How many ASCII characters are at the head is returned as *SKIP. */
3487 3596
3488 static int 3597 static int
3489 detect_coding_mask (source, src_bytes, priorities, skip) 3598 detect_coding_mask (source, src_bytes, priorities, skip)
3490 unsigned char *source; 3599 unsigned char *source;
3491 int src_bytes, *priorities, *skip; 3600 int src_bytes, *priorities, *skip;
3492 { 3601 {
3493 register unsigned char c; 3602 register unsigned char c;
3494 unsigned char *src = source, *src_end = source + src_bytes; 3603 unsigned char *src = source, *src_end = source + src_bytes;
3495 unsigned int mask; 3604 unsigned int mask, utf16_examined_p, iso2022_examined_p;
3496 int i; 3605 int i, idx;
3497 3606
3498 /* At first, skip all ASCII characters and control characters except 3607 /* At first, skip all ASCII characters and control characters except
3499 for three ISO2022 specific control characters. */ 3608 for three ISO2022 specific control characters. */
3500 ascii_skip_code[ISO_CODE_SO] = 0; 3609 ascii_skip_code[ISO_CODE_SO] = 0;
3501 ascii_skip_code[ISO_CODE_SI] = 0; 3610 ascii_skip_code[ISO_CODE_SI] = 0;
3526 else 3635 else
3527 ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1; 3636 ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3528 goto label_loop_detect_coding; 3637 goto label_loop_detect_coding;
3529 } 3638 }
3530 if (priorities) 3639 if (priorities)
3531 goto label_return_highest_only; 3640 {
3641 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3642 {
3643 if (mask & priorities[i])
3644 return priorities[i];
3645 }
3646 return CODING_CATEGORY_MASK_RAW_TEXT;
3647 }
3532 } 3648 }
3533 else 3649 else
3534 { 3650 {
3535 int try; 3651 int try;
3536 3652
3537 if (c < 0xA0) 3653 if (c < 0xA0)
3538 { 3654 {
3539 /* C is the first byte of SJIS character code, 3655 /* C is the first byte of SJIS character code,
3540 or a leading-code of Emacs' internal format (emacs-mule). */ 3656 or a leading-code of Emacs' internal format (emacs-mule),
3541 try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE; 3657 or the first byte of UTF-16. */
3658 try = (CODING_CATEGORY_MASK_SJIS
3659 | CODING_CATEGORY_MASK_EMACS_MULE
3660 | CODING_CATEGORY_MASK_UTF_16_BE
3661 | CODING_CATEGORY_MASK_UTF_16_LE);
3542 3662
3543 /* Or, if C is a special latin extra code, 3663 /* Or, if C is a special latin extra code,
3544 or is an ISO2022 specific control code of C1 (SS2 or SS3), 3664 or is an ISO2022 specific control code of C1 (SS2 or SS3),
3545 or is an ISO2022 control-sequence-introducer (CSI), 3665 or is an ISO2022 control-sequence-introducer (CSI),
3546 we should also consider the possibility of ISO2022 codings. */ 3666 we should also consider the possibility of ISO2022 codings. */
3557 | CODING_CATEGORY_MASK_ISO_8BIT); 3677 | CODING_CATEGORY_MASK_ISO_8BIT);
3558 } 3678 }
3559 else 3679 else
3560 /* C is a character of ISO2022 in graphic plane right, 3680 /* C is a character of ISO2022 in graphic plane right,
3561 or a SJIS's 1-byte character code (i.e. JISX0201), 3681 or a SJIS's 1-byte character code (i.e. JISX0201),
3562 or the first byte of BIG5's 2-byte code. */ 3682 or the first byte of BIG5's 2-byte code,
3683 or the first byte of UTF-8/16. */
3563 try = (CODING_CATEGORY_MASK_ISO_8_ELSE 3684 try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3564 | CODING_CATEGORY_MASK_ISO_8BIT 3685 | CODING_CATEGORY_MASK_ISO_8BIT
3565 | CODING_CATEGORY_MASK_SJIS 3686 | CODING_CATEGORY_MASK_SJIS
3566 | CODING_CATEGORY_MASK_BIG5); 3687 | CODING_CATEGORY_MASK_BIG5
3688 | CODING_CATEGORY_MASK_UTF_8
3689 | CODING_CATEGORY_MASK_UTF_16_BE
3690 | CODING_CATEGORY_MASK_UTF_16_LE);
3567 3691
3568 /* Or, we may have to consider the possibility of CCL. */ 3692 /* Or, we may have to consider the possibility of CCL. */
3569 if (coding_system_table[CODING_CATEGORY_IDX_CCL] 3693 if (coding_system_table[CODING_CATEGORY_IDX_CCL]
3570 && (coding_system_table[CODING_CATEGORY_IDX_CCL] 3694 && (coding_system_table[CODING_CATEGORY_IDX_CCL]
3571 ->spec.ccl.valid_codes)[c]) 3695 ->spec.ccl.valid_codes)[c])
3572 try |= CODING_CATEGORY_MASK_CCL; 3696 try |= CODING_CATEGORY_MASK_CCL;
3573 3697
3574 mask = 0; 3698 mask = 0;
3699 utf16_examined_p = iso2022_examined_p = 0;
3575 if (priorities) 3700 if (priorities)
3576 { 3701 {
3577 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++) 3702 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3578 { 3703 {
3579 if (priorities[i] & try & CODING_CATEGORY_MASK_ISO) 3704 if (!iso2022_examined_p
3580 mask = detect_coding_iso2022 (src, src_end); 3705 && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
3706 {
3707 mask |= detect_coding_iso2022 (src, src_end);
3708 iso2022_examined_p = 1;
3709 }
3581 else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS) 3710 else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3582 mask = detect_coding_sjis (src, src_end); 3711 mask |= detect_coding_sjis (src, src_end);
3712 else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
3713 mask |= detect_coding_utf_8 (src, src_end);
3714 else if (!utf16_examined_p
3715 && (priorities[i] & try &
3716 CODING_CATEGORY_MASK_UTF_16_BE_LE))
3717 {
3718 mask |= detect_coding_utf_16 (src, src_end);
3719 utf16_examined_p = 1;
3720 }
3583 else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5) 3721 else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3584 mask = detect_coding_big5 (src, src_end); 3722 mask |= detect_coding_big5 (src, src_end);
3585 else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE) 3723 else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3586 mask = detect_coding_emacs_mule (src, src_end); 3724 mask |= detect_coding_emacs_mule (src, src_end);
3587 else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL) 3725 else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3588 mask = detect_coding_ccl (src, src_end); 3726 mask |= detect_coding_ccl (src, src_end);
3589 else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT) 3727 else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3590 mask = CODING_CATEGORY_MASK_RAW_TEXT; 3728 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
3591 else if (priorities[i] & CODING_CATEGORY_MASK_BINARY) 3729 else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3592 mask = CODING_CATEGORY_MASK_BINARY; 3730 mask |= CODING_CATEGORY_MASK_BINARY;
3593 if (mask) 3731 if (mask & priorities[i])
3594 goto label_return_highest_only; 3732 return priorities[i];
3595 } 3733 }
3596 return CODING_CATEGORY_MASK_RAW_TEXT; 3734 return CODING_CATEGORY_MASK_RAW_TEXT;
3597 } 3735 }
3598 if (try & CODING_CATEGORY_MASK_ISO) 3736 if (try & CODING_CATEGORY_MASK_ISO)
3599 mask |= detect_coding_iso2022 (src, src_end); 3737 mask |= detect_coding_iso2022 (src, src_end);
3600 if (try & CODING_CATEGORY_MASK_SJIS) 3738 if (try & CODING_CATEGORY_MASK_SJIS)
3601 mask |= detect_coding_sjis (src, src_end); 3739 mask |= detect_coding_sjis (src, src_end);
3602 if (try & CODING_CATEGORY_MASK_BIG5) 3740 if (try & CODING_CATEGORY_MASK_BIG5)
3603 mask |= detect_coding_big5 (src, src_end); 3741 mask |= detect_coding_big5 (src, src_end);
3742 if (try & CODING_CATEGORY_MASK_UTF_8)
3743 mask |= detect_coding_utf_8 (src, src_end);
3744 if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
3745 mask |= detect_coding_utf_16 (src, src_end);
3604 if (try & CODING_CATEGORY_MASK_EMACS_MULE) 3746 if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3605 mask |= detect_coding_emacs_mule (src, src_end); 3747 mask |= detect_coding_emacs_mule (src, src_end);
3606 if (try & CODING_CATEGORY_MASK_CCL) 3748 if (try & CODING_CATEGORY_MASK_CCL)
3607 mask |= detect_coding_ccl (src, src_end); 3749 mask |= detect_coding_ccl (src, src_end);
3608 } 3750 }
3609 return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY); 3751 return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3610
3611 label_return_highest_only:
3612 for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3613 {
3614 if (mask & priorities[i])
3615 return priorities[i];
3616 }
3617 return CODING_CATEGORY_MASK_RAW_TEXT;
3618 } 3752 }
3619 3753
3620 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded. 3754 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3621 The information of the detected coding system is set in CODING. */ 3755 The information of the detected coding system is set in CODING. */
3622 3756
3708 if (*skip == 0) 3842 if (*skip == 0)
3709 *skip = src_end - source; 3843 *skip = src_end - source;
3710 return eol_type; 3844 return eol_type;
3711 } 3845 }
3712 3846
3847 /* Like detect_eol_type, but detect EOL type in 2-octet
3848 big-endian/little-endian format for coding systems utf-16-be and
3849 utf-16-le. */
3850
3851 static int
3852 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
3853 unsigned char *source;
3854 int src_bytes, *skip;
3855 {
3856 unsigned char *src = source, *src_end = src + src_bytes;
3857 unsigned int c1, c2;
3858 int total = 0; /* How many end-of-lines are found so far. */
3859 int eol_type = CODING_EOL_UNDECIDED;
3860 int this_eol_type;
3861 int msb, lsb;
3862
3863 if (big_endian_p)
3864 msb = 0, lsb = 1;
3865 else
3866 msb = 1, lsb = 0;
3867
3868 *skip = 0;
3869
3870 while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
3871 {
3872 c1 = (src[msb] << 8) | (src[lsb]);
3873 src += 2;
3874
3875 if (c1 == '\n' || c1 == '\r')
3876 {
3877 if (*skip == 0)
3878 *skip = src - 2 - source;
3879 total++;
3880 if (c1 == '\n')
3881 {
3882 this_eol_type = CODING_EOL_LF;
3883 }
3884 else
3885 {
3886 if ((src + 1) >= src_end)
3887 {
3888 this_eol_type = CODING_EOL_CR;
3889 }
3890 else
3891 {
3892 c2 = (src[msb] << 8) | (src[lsb]);
3893 if (c2 == '\n')
3894 this_eol_type = CODING_EOL_CRLF, src += 2;
3895 else
3896 this_eol_type = CODING_EOL_CR;
3897 }
3898 }
3899
3900 if (eol_type == CODING_EOL_UNDECIDED)
3901 /* This is the first end-of-line. */
3902 eol_type = this_eol_type;
3903 else if (eol_type != this_eol_type)
3904 {
3905 /* The found type is different from what found before. */
3906 eol_type = CODING_EOL_INCONSISTENT;
3907 break;
3908 }
3909 }
3910 }
3911
3912 if (*skip == 0)
3913 *skip = src_end - source;
3914 return eol_type;
3915 }
3916
3713 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC 3917 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3714 is encoded. If it detects an appropriate format of end-of-line, it 3918 is encoded. If it detects an appropriate format of end-of-line, it
3715 sets the information in *CODING. */ 3919 sets the information in *CODING. */
3716 3920
3717 void 3921 void
3720 unsigned char *src; 3924 unsigned char *src;
3721 int src_bytes; 3925 int src_bytes;
3722 { 3926 {
3723 Lisp_Object val; 3927 Lisp_Object val;
3724 int skip; 3928 int skip;
3725 int eol_type = detect_eol_type (src, src_bytes, &skip); 3929 int eol_type;
3930
3931 switch (coding->category_idx)
3932 {
3933 case CODING_CATEGORY_IDX_UTF_16_BE:
3934 eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
3935 break;
3936 case CODING_CATEGORY_IDX_UTF_16_LE:
3937 eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
3938 break;
3939 default:
3940 eol_type = detect_eol_type (src, src_bytes, &skip);
3941 break;
3942 }
3726 3943
3727 if (coding->heading_ascii > skip) 3944 if (coding->heading_ascii > skip)
3728 coding->heading_ascii = skip; 3945 coding->heading_ascii = skip;
3729 else 3946 else
3730 skip = coding->heading_ascii; 3947 skip = coding->heading_ascii;
5214 return (highest ? val : Fcons (val, Qnil)); 5431 return (highest ? val : Fcons (val, Qnil));
5215 } 5432 }
5216 5433
5217 /* At first, gather possible coding systems in VAL. */ 5434 /* At first, gather possible coding systems in VAL. */
5218 val = Qnil; 5435 val = Qnil;
5219 for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCDR (tmp)) 5436 for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
5220 { 5437 {
5221 int idx 5438 Lisp_Object category_val, category_index;
5222 = XFASTINT (Fget (XCAR (tmp), Qcoding_category_index)); 5439
5223 if (coding_mask & (1 << idx)) 5440 category_index = Fget (XCAR (tmp), Qcoding_category_index);
5224 { 5441 category_val = Fsymbol_value (XCAR (tmp));
5225 val = Fcons (Fsymbol_value (XCAR (tmp)), val); 5442 if (!NILP (category_val)
5443 && NATNUMP (category_index)
5444 && (coding_mask & (1 << XFASTINT (category_index))))
5445 {
5446 val = Fcons (category_val, val);
5226 if (highest) 5447 if (highest)
5227 break; 5448 break;
5228 } 5449 }
5229 } 5450 }
5230 if (!highest) 5451 if (!highest)
5231 val = Fnreverse (val); 5452 val = Fnreverse (val);
5232 5453
5233 /* Then, replace the elements with subsidiary coding systems. */ 5454 /* Then, replace the elements with subsidiary coding systems. */
5234 for (tmp = val; !NILP (tmp); tmp = XCDR (tmp)) 5455 for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
5235 { 5456 {
5236 if (eol_type != CODING_EOL_UNDECIDED 5457 if (eol_type != CODING_EOL_UNDECIDED
5237 && eol_type != CODING_EOL_INCONSISTENT) 5458 && eol_type != CODING_EOL_INCONSISTENT)
5238 { 5459 {
5239 Lisp_Object eol; 5460 Lisp_Object eol;
5710 } 5931 }
5711 5932
5712 DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal, 5933 DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal,
5713 Supdate_coding_systems_internal, 0, 0, 0, 5934 Supdate_coding_systems_internal, 0, 0, 0,
5714 "Update internal database for ISO2022 and CCL based coding systems.\n\ 5935 "Update internal database for ISO2022 and CCL based coding systems.\n\
5715 When values of the following coding categories are changed, you must\n\ 5936 When values of any coding categories are changed, you must\n\
5716 call this function:\n\ 5937 call this function")
5717 coding-category-iso-7, coding-category-iso-7-tight,\n\
5718 coding-category-iso-8-1, coding-category-iso-8-2,\n\
5719 coding-category-iso-7-else, coding-category-iso-8-else,\n\
5720 coding-category-ccl")
5721 () 5938 ()
5722 { 5939 {
5723 int i; 5940 int i;
5724 5941
5725 for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++) 5942 for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
5726 { 5943 {
5727 Lisp_Object val; 5944 Lisp_Object val;
5728 5945
5729 val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value; 5946 val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value;
5730 if (!NILP (val)) 5947 if (!NILP (val))
5765 coding_priorities[i++] = (1 << idx); 5982 coding_priorities[i++] = (1 << idx);
5766 val = XCDR (val); 5983 val = XCDR (val);
5767 } 5984 }
5768 /* If coding-category-list is valid and contains all coding 5985 /* If coding-category-list is valid and contains all coding
5769 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not, 5986 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
5770 the following code saves Emacs from craching. */ 5987 the following code saves Emacs from crashing. */
5771 while (i < CODING_CATEGORY_IDX_MAX) 5988 while (i < CODING_CATEGORY_IDX_MAX)
5772 coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT; 5989 coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5773 5990
5774 return Qnil; 5991 return Qnil;
5775 } 5992 }