comparison src/coding.c @ 90651:23c1467f8640

(detect_coding_charset): Fix detection of multi-byte charset.
author Kenichi Handa <handa@m17n.org>
date Wed, 08 Nov 2006 04:28:29 +0000
parents c358d0861b16
children dbe3f29e61d6
comparison
equal deleted inserted replaced
90650:02cf29720f31 90651:23c1467f8640
4729 const unsigned char *src_end = coding->source + coding->src_bytes; 4729 const unsigned char *src_end = coding->source + coding->src_bytes;
4730 int multibytep = coding->src_multibyte; 4730 int multibytep = coding->src_multibyte;
4731 int consumed_chars = 0; 4731 int consumed_chars = 0;
4732 Lisp_Object attrs, valids; 4732 Lisp_Object attrs, valids;
4733 int found = 0; 4733 int found = 0;
4734 int head_ascii = coding->head_ascii;
4734 4735
4735 detect_info->checked |= CATEGORY_MASK_CHARSET; 4736 detect_info->checked |= CATEGORY_MASK_CHARSET;
4736 4737
4737 coding = &coding_categories[coding_category_charset]; 4738 coding = &coding_categories[coding_category_charset];
4738 attrs = CODING_ID_ATTRS (coding->id); 4739 attrs = CODING_ID_ATTRS (coding->id);
4739 valids = AREF (attrs, coding_attr_charset_valids); 4740 valids = AREF (attrs, coding_attr_charset_valids);
4740 4741
4741 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) 4742 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4742 src += coding->head_ascii; 4743 src += head_ascii;
4743 4744
4744 while (1) 4745 while (1)
4745 { 4746 {
4746 int c; 4747 int c;
4748 Lisp_Object val;
4749 struct charset *charset;
4750 int dim, idx;
4747 4751
4748 src_base = src; 4752 src_base = src;
4749 ONE_MORE_BYTE (c); 4753 ONE_MORE_BYTE (c);
4750 if (c < 0) 4754 if (c < 0)
4751 continue; 4755 continue;
4752 if (NILP (AREF (valids, c))) 4756 val = AREF (valids, c);
4757 if (NILP (val))
4753 break; 4758 break;
4754 if (c >= 0x80) 4759 if (c >= 0x80)
4755 found = CATEGORY_MASK_CHARSET; 4760 found = CATEGORY_MASK_CHARSET;
4756 } 4761 if (INTEGERP (val))
4762 {
4763 charset = CHARSET_FROM_ID (XFASTINT (val));
4764 dim = CHARSET_DIMENSION (charset);
4765 for (idx = 1; idx < dim; idx++)
4766 {
4767 if (src == src_end)
4768 goto too_short;
4769 ONE_MORE_BYTE (c);
4770 if (c < charset->code_space[(dim - 1 - idx) * 2]
4771 || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
4772 break;
4773 }
4774 if (idx < dim)
4775 break;
4776 }
4777 else
4778 {
4779 idx = 1;
4780 for (; CONSP (val); val = XCDR (val))
4781 {
4782 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4783 dim = CHARSET_DIMENSION (charset);
4784 while (idx < dim)
4785 {
4786 if (src == src_end)
4787 goto too_short;
4788 ONE_MORE_BYTE (c);
4789 if (c < charset->code_space[(dim - 1 - idx) * 4]
4790 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
4791 break;
4792 idx++;
4793 }
4794 if (idx == dim)
4795 {
4796 val = Qnil;
4797 break;
4798 }
4799 }
4800 if (CONSP (val))
4801 break;
4802 }
4803 }
4804 too_short:
4757 detect_info->rejected |= CATEGORY_MASK_CHARSET; 4805 detect_info->rejected |= CATEGORY_MASK_CHARSET;
4758 return 0; 4806 return 0;
4759 4807
4760 no_more_source: 4808 no_more_source:
4761 detect_info->found |= found; 4809 detect_info->found |= found;