Mercurial > emacs
comparison src/coding.c @ 90651:23c1467f8640
(detect_coding_charset): Fix detection of multi-byte
charset.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Wed, 08 Nov 2006 04:28:29 +0000 |
| parents | c358d0861b16 |
| children | dbe3f29e61d6 |
comparison
equal
deleted
inserted
replaced
| 90650:02cf29720f31 | 90651:23c1467f8640 |
|---|---|
| 4729 const unsigned char *src_end = coding->source + coding->src_bytes; | 4729 const unsigned char *src_end = coding->source + coding->src_bytes; |
| 4730 int multibytep = coding->src_multibyte; | 4730 int multibytep = coding->src_multibyte; |
| 4731 int consumed_chars = 0; | 4731 int consumed_chars = 0; |
| 4732 Lisp_Object attrs, valids; | 4732 Lisp_Object attrs, valids; |
| 4733 int found = 0; | 4733 int found = 0; |
| 4734 int head_ascii = coding->head_ascii; | |
| 4734 | 4735 |
| 4735 detect_info->checked |= CATEGORY_MASK_CHARSET; | 4736 detect_info->checked |= CATEGORY_MASK_CHARSET; |
| 4736 | 4737 |
| 4737 coding = &coding_categories[coding_category_charset]; | 4738 coding = &coding_categories[coding_category_charset]; |
| 4738 attrs = CODING_ID_ATTRS (coding->id); | 4739 attrs = CODING_ID_ATTRS (coding->id); |
| 4739 valids = AREF (attrs, coding_attr_charset_valids); | 4740 valids = AREF (attrs, coding_attr_charset_valids); |
| 4740 | 4741 |
| 4741 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) | 4742 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) |
| 4742 src += coding->head_ascii; | 4743 src += head_ascii; |
| 4743 | 4744 |
| 4744 while (1) | 4745 while (1) |
| 4745 { | 4746 { |
| 4746 int c; | 4747 int c; |
| 4748 Lisp_Object val; | |
| 4749 struct charset *charset; | |
| 4750 int dim, idx; | |
| 4747 | 4751 |
| 4748 src_base = src; | 4752 src_base = src; |
| 4749 ONE_MORE_BYTE (c); | 4753 ONE_MORE_BYTE (c); |
| 4750 if (c < 0) | 4754 if (c < 0) |
| 4751 continue; | 4755 continue; |
| 4752 if (NILP (AREF (valids, c))) | 4756 val = AREF (valids, c); |
| 4757 if (NILP (val)) | |
| 4753 break; | 4758 break; |
| 4754 if (c >= 0x80) | 4759 if (c >= 0x80) |
| 4755 found = CATEGORY_MASK_CHARSET; | 4760 found = CATEGORY_MASK_CHARSET; |
| 4756 } | 4761 if (INTEGERP (val)) |
| 4762 { | |
| 4763 charset = CHARSET_FROM_ID (XFASTINT (val)); | |
| 4764 dim = CHARSET_DIMENSION (charset); | |
| 4765 for (idx = 1; idx < dim; idx++) | |
| 4766 { | |
| 4767 if (src == src_end) | |
| 4768 goto too_short; | |
| 4769 ONE_MORE_BYTE (c); | |
| 4770 if (c < charset->code_space[(dim - 1 - idx) * 2] | |
| 4771 || c > charset->code_space[(dim - 1 - idx) * 2 + 1]) | |
| 4772 break; | |
| 4773 } | |
| 4774 if (idx < dim) | |
| 4775 break; | |
| 4776 } | |
| 4777 else | |
| 4778 { | |
| 4779 idx = 1; | |
| 4780 for (; CONSP (val); val = XCDR (val)) | |
| 4781 { | |
| 4782 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val))); | |
| 4783 dim = CHARSET_DIMENSION (charset); | |
| 4784 while (idx < dim) | |
| 4785 { | |
| 4786 if (src == src_end) | |
| 4787 goto too_short; | |
| 4788 ONE_MORE_BYTE (c); | |
| 4789 if (c < charset->code_space[(dim - 1 - idx) * 4] | |
| 4790 || c > charset->code_space[(dim - 1 - idx) * 4 + 1]) | |
| 4791 break; | |
| 4792 idx++; | |
| 4793 } | |
| 4794 if (idx == dim) | |
| 4795 { | |
| 4796 val = Qnil; | |
| 4797 break; | |
| 4798 } | |
| 4799 } | |
| 4800 if (CONSP (val)) | |
| 4801 break; | |
| 4802 } | |
| 4803 } | |
| 4804 too_short: | |
| 4757 detect_info->rejected |= CATEGORY_MASK_CHARSET; | 4805 detect_info->rejected |= CATEGORY_MASK_CHARSET; |
| 4758 return 0; | 4806 return 0; |
| 4759 | 4807 |
| 4760 no_more_source: | 4808 no_more_source: |
| 4761 detect_info->found |= found; | 4809 detect_info->found |= found; |
