comparison src/coding.c @ 23088:45c36d636f66

(detect_coding_iso2022): Don't check the byte length of succeeding codes (0xa0..0xFF) if the codes follows single shit code.
author Kenichi Handa <handa@m17n.org>
date Mon, 24 Aug 1998 06:42:56 +0000
parents 910740dcedb5
children 20486aa49e7d
comparison
equal deleted inserted replaced
23087:4ae12d3c8c30 23088:45c36d636f66
681 detect_coding_iso2022 (src, src_end) 681 detect_coding_iso2022 (src, src_end)
682 unsigned char *src, *src_end; 682 unsigned char *src, *src_end;
683 { 683 {
684 int mask = CODING_CATEGORY_MASK_ISO; 684 int mask = CODING_CATEGORY_MASK_ISO;
685 int mask_found = 0; 685 int mask_found = 0;
686 int reg[4], shift_out = 0; 686 int reg[4], shift_out = 0, single_shifting = 0;
687 int c, c1, i, charset; 687 int c, c1, i, charset;
688 688
689 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; 689 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
690 while (mask && src < src_end) 690 while (mask && src < src_end)
691 { 691 {
692 c = *src++; 692 c = *src++;
693 switch (c) 693 switch (c)
694 { 694 {
695 case ISO_CODE_ESC: 695 case ISO_CODE_ESC:
696 single_shifting = 0;
696 if (src >= src_end) 697 if (src >= src_end)
697 break; 698 break;
698 c = *src++; 699 c = *src++;
699 if (c >= '(' && c <= '/') 700 if (c >= '(' && c <= '/')
700 { 701 {
779 if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset)) 780 if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
780 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; 781 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
781 break; 782 break;
782 783
783 case ISO_CODE_SO: 784 case ISO_CODE_SO:
785 single_shifting = 0;
784 if (shift_out == 0 786 if (shift_out == 0
785 && (reg[1] >= 0 787 && (reg[1] >= 0
786 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE) 788 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
787 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))) 789 || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
788 { 790 {
791 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT; 793 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
792 } 794 }
793 break; 795 break;
794 796
795 case ISO_CODE_SI: 797 case ISO_CODE_SI:
798 single_shifting = 0;
796 if (shift_out == 1) 799 if (shift_out == 1)
797 { 800 {
798 /* Locking shift in. */ 801 /* Locking shift in. */
799 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT; 802 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
800 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT; 803 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
801 } 804 }
802 break; 805 break;
803 806
804 case ISO_CODE_CSI: 807 case ISO_CODE_CSI:
808 single_shifting = 0;
805 case ISO_CODE_SS2: 809 case ISO_CODE_SS2:
806 case ISO_CODE_SS3: 810 case ISO_CODE_SS3:
807 { 811 {
808 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE; 812 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
809 813
813 & CODING_FLAG_ISO_SINGLE_SHIFT) 817 & CODING_FLAG_ISO_SINGLE_SHIFT)
814 newmask |= CODING_CATEGORY_MASK_ISO_8_1; 818 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
815 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags 819 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
816 & CODING_FLAG_ISO_SINGLE_SHIFT) 820 & CODING_FLAG_ISO_SINGLE_SHIFT)
817 newmask |= CODING_CATEGORY_MASK_ISO_8_2; 821 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
822 single_shifting = 1;
818 } 823 }
819 if (VECTORP (Vlatin_extra_code_table) 824 if (VECTORP (Vlatin_extra_code_table)
820 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) 825 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
821 { 826 {
822 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags 827 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
831 } 836 }
832 break; 837 break;
833 838
834 default: 839 default:
835 if (c < 0x80) 840 if (c < 0x80)
836 break; 841 {
842 single_shifting = 0;
843 break;
844 }
837 else if (c < 0xA0) 845 else if (c < 0xA0)
838 { 846 {
847 single_shifting = 0;
839 if (VECTORP (Vlatin_extra_code_table) 848 if (VECTORP (Vlatin_extra_code_table)
840 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) 849 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
841 { 850 {
842 int newmask = 0; 851 int newmask = 0;
843 852
858 unsigned char *src_begin = src; 867 unsigned char *src_begin = src;
859 868
860 mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT 869 mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
861 | CODING_CATEGORY_MASK_ISO_7_ELSE); 870 | CODING_CATEGORY_MASK_ISO_7_ELSE);
862 mask_found |= CODING_CATEGORY_MASK_ISO_8_1; 871 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
863 while (src < src_end && *src >= 0xA0) 872 /* Check the length of succeeding codes of the range
864 src++; 873 0xA0..0FF. If the byte length is odd, we exclude
865 if ((src - src_begin - 1) & 1 && src < src_end) 874 CODING_CATEGORY_MASK_ISO_8_2. We can check this only
866 mask &= ~CODING_CATEGORY_MASK_ISO_8_2; 875 when we are not single shifting. */
867 else 876 if (!single_shifting)
868 mask_found |= CODING_CATEGORY_MASK_ISO_8_2; 877 {
878 while (src < src_end && *src >= 0xA0)
879 src++;
880 if ((src - src_begin - 1) & 1 && src < src_end)
881 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
882 else
883 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
884 }
869 } 885 }
870 break; 886 break;
871 } 887 }
872 } 888 }
873 889