Mercurial > emacs
comparison src/coding.c @ 88585:c7772f702227
(ONE_MORE_BYTE_NO_CHECK): Increment consumed_chars.
(emacs_mule_char): New arg src. Delete arg `composition'. Caller
changed. Handle 2-byte and 3-byte charsets correctly.
(DECODE_EMACS_MULE_COMPOSITION_RULE_20): Renamed from
DECODE_EMACS_MULE_COMPOSITION_RULE. Caller changed.
(DECODE_EMACS_MULE_COMPOSITION_RULE_21): New macro.
(DECODE_EMACS_MULE_21_COMPOSITION): Call
DECODE_EMACS_MULE_COMPOSITION_RULE_21. Produce correct annotation
sequence.
(decode_coding_emacs_mule): Handle composition correctly. Rewind
`src' and `consumed_chars' correctly before calling
emacs_mule_char.
(DECODE_COMPOSITION_START): Correctly handle the case of altchar
and alt&rule composition.
(decode_coding_iso_2022): Handle composition correctly.
(init_coding_once): Setup emacs_mule_bytes for private charsets.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Tue, 21 May 2002 04:22:58 +0000 |
| parents | 133bf7ab1bad |
| children | 11186ff7ea0d |
comparison
equal
deleted
inserted
replaced
| 88584:f4f7b1532dc3 | 88585:c7772f702227 |
|---|---|
| 762 { \ | 762 { \ |
| 763 if ((c & 0xFE) != 0xC0) \ | 763 if ((c & 0xFE) != 0xC0) \ |
| 764 error ("Undecodable char found"); \ | 764 error ("Undecodable char found"); \ |
| 765 c = ((c & 1) << 6) | *src++; \ | 765 c = ((c & 1) << 6) | *src++; \ |
| 766 } \ | 766 } \ |
| 767 consumed_chars++; \ | |
| 767 } while (0) | 768 } while (0) |
| 768 | 769 |
| 769 | 770 |
| 770 /* Store a byte C in the place pointed by DST and increment DST to the | 771 /* Store a byte C in the place pointed by DST and increment DST to the |
| 771 next free point, and increment PRODUCED_CHARS. The caller should | 772 next free point, and increment PRODUCED_CHARS. The caller should |
| 1521 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */ | 1522 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */ |
| 1522 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */ | 1523 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */ |
| 1523 | 1524 |
| 1524 | 1525 |
| 1525 int | 1526 int |
| 1526 emacs_mule_char (coding, composition, nbytes, nchars) | 1527 emacs_mule_char (coding, src, nbytes, nchars) |
| 1527 struct coding_system *coding; | 1528 struct coding_system *coding; |
| 1529 unsigned char *src; | |
| 1528 int composition; | 1530 int composition; |
| 1529 int *nbytes, *nchars; | 1531 int *nbytes, *nchars; |
| 1530 { | 1532 { |
| 1531 unsigned char *src = coding->source + coding->consumed; | |
| 1532 unsigned char *src_end = coding->source + coding->src_bytes; | 1533 unsigned char *src_end = coding->source + coding->src_bytes; |
| 1533 int multibytep = coding->src_multibyte; | 1534 int multibytep = coding->src_multibyte; |
| 1534 unsigned char *src_base = src; | 1535 unsigned char *src_base = src; |
| 1535 struct charset *charset; | 1536 struct charset *charset; |
| 1536 unsigned code; | 1537 unsigned code; |
| 1537 int c; | 1538 int c; |
| 1538 int consumed_chars = 0; | 1539 int consumed_chars = 0; |
| 1539 | 1540 |
| 1540 ONE_MORE_BYTE (c); | 1541 ONE_MORE_BYTE (c); |
| 1541 if (composition) | |
| 1542 { | |
| 1543 c -= 0x20; | |
| 1544 if (c == 0x80) | |
| 1545 { | |
| 1546 ONE_MORE_BYTE (c); | |
| 1547 if (c < 0xA0) | |
| 1548 goto invalid_code; | |
| 1549 *nbytes = src - src_base; | |
| 1550 *nchars = consumed_chars; | |
| 1551 return (c - 0x80); | |
| 1552 } | |
| 1553 } | |
| 1554 | |
| 1555 switch (emacs_mule_bytes[c]) | 1542 switch (emacs_mule_bytes[c]) |
| 1556 { | 1543 { |
| 1557 case 2: | 1544 case 2: |
| 1558 if (! (charset = emacs_mule_charset[c])) | 1545 if (! (charset = emacs_mule_charset[c])) |
| 1559 goto invalid_code; | 1546 goto invalid_code; |
| 1574 else | 1561 else |
| 1575 { | 1562 { |
| 1576 if (! (charset = emacs_mule_charset[c])) | 1563 if (! (charset = emacs_mule_charset[c])) |
| 1577 goto invalid_code; | 1564 goto invalid_code; |
| 1578 ONE_MORE_BYTE (c); | 1565 ONE_MORE_BYTE (c); |
| 1579 code = (c & 0x7F) << 7; | 1566 code = (c & 0x7F) << 8; |
| 1580 ONE_MORE_BYTE (c); | 1567 ONE_MORE_BYTE (c); |
| 1581 code |= c & 0x7F; | 1568 code |= c & 0x7F; |
| 1582 } | 1569 } |
| 1583 break; | 1570 break; |
| 1584 | 1571 |
| 1585 case 4: | 1572 case 4: |
| 1573 ONE_MORE_BYTE (c); | |
| 1586 if (! (charset = emacs_mule_charset[c])) | 1574 if (! (charset = emacs_mule_charset[c])) |
| 1587 goto invalid_code; | 1575 goto invalid_code; |
| 1588 ONE_MORE_BYTE (c); | 1576 ONE_MORE_BYTE (c); |
| 1589 code = (c & 0x7F) << 7; | 1577 code = (c & 0x7F) << 8; |
| 1590 ONE_MORE_BYTE (c); | 1578 ONE_MORE_BYTE (c); |
| 1591 code |= c & 0x7F; | 1579 code |= c & 0x7F; |
| 1592 break; | 1580 break; |
| 1593 | 1581 |
| 1594 case 1: | 1582 case 1: |
| 1707 int c; \ | 1695 int c; \ |
| 1708 int nbytes, nchars; \ | 1696 int nbytes, nchars; \ |
| 1709 \ | 1697 \ |
| 1710 if (src == src_end) \ | 1698 if (src == src_end) \ |
| 1711 break; \ | 1699 break; \ |
| 1712 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \ | 1700 c = emacs_mule_char (coding, src, &nbytes, &nchars); \ |
| 1713 if (c < 0) \ | 1701 if (c < 0) \ |
| 1714 { \ | 1702 { \ |
| 1715 if (c == -2) \ | 1703 if (c == -2) \ |
| 1716 break; \ | 1704 break; \ |
| 1717 goto invalid_code; \ | 1705 goto invalid_code; \ |
| 1722 } \ | 1710 } \ |
| 1723 else | 1711 else |
| 1724 | 1712 |
| 1725 | 1713 |
| 1726 /* Decode a composition rule represented as a component of composition | 1714 /* Decode a composition rule represented as a component of composition |
| 1727 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC | 1715 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF, |
| 1728 points an invalid byte sequence, set C to -1. */ | 1716 and increment BUF. If SRC points an invalid byte sequence, set C |
| 1729 | 1717 to -1. */ |
| 1730 #define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \ | 1718 |
| 1719 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \ | |
| 1731 do { \ | 1720 do { \ |
| 1732 int c, gref, nref; \ | 1721 int c, gref, nref; \ |
| 1733 \ | 1722 \ |
| 1734 if (src < src_end) \ | 1723 if (src >= src_end) \ |
| 1735 goto invalid_code; \ | 1724 goto invalid_code; \ |
| 1736 ONE_MORE_BYTE_NO_CHECK (c); \ | 1725 ONE_MORE_BYTE_NO_CHECK (c); \ |
| 1737 c -= 0xA0; \ | 1726 c -= 0x20; \ |
| 1738 if (c < 0 || c >= 81) \ | 1727 if (c < 0 || c >= 81) \ |
| 1739 goto invalid_code; \ | 1728 goto invalid_code; \ |
| 1740 \ | 1729 \ |
| 1741 gref = c / 9, nref = c % 9; \ | 1730 gref = c / 9, nref = c % 9; \ |
| 1731 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ | |
| 1732 } while (0) | |
| 1733 | |
| 1734 | |
| 1735 /* Decode a composition rule represented as a component of composition | |
| 1736 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF, | |
| 1737 and increment BUF. If SRC points an invalid byte sequence, set C | |
| 1738 to -1. */ | |
| 1739 | |
| 1740 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \ | |
| 1741 do { \ | |
| 1742 int gref, nref; \ | |
| 1743 \ | |
| 1744 if (src + 1>= src_end) \ | |
| 1745 goto invalid_code; \ | |
| 1746 ONE_MORE_BYTE_NO_CHECK (gref); \ | |
| 1747 gref -= 0x20; \ | |
| 1748 ONE_MORE_BYTE_NO_CHECK (nref); \ | |
| 1749 nref -= 0x20; \ | |
| 1750 if (gref < 0 || gref >= 81 \ | |
| 1751 || nref < 0 || nref >= 81) \ | |
| 1752 goto invalid_code; \ | |
| 1742 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ | 1753 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ |
| 1743 } while (0) | 1754 } while (0) |
| 1744 | 1755 |
| 1745 | 1756 |
| 1746 #define ADD_COMPOSITION_DATA(buf, method, nchars) \ | 1757 #define ADD_COMPOSITION_DATA(buf, method, nchars) \ |
| 1754 | 1765 |
| 1755 | 1766 |
| 1756 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ | 1767 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ |
| 1757 do { \ | 1768 do { \ |
| 1758 /* Emacs 21 style format. The first three bytes at SRC are \ | 1769 /* Emacs 21 style format. The first three bytes at SRC are \ |
| 1759 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ | 1770 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ |
| 1760 the byte length of this composition information, CHARS is the \ | 1771 the byte length of this composition information, CHARS is the \ |
| 1761 number of characters composed by this composition. */ \ | 1772 number of characters composed by this composition. */ \ |
| 1762 enum composition_method method = c - 0xF0; \ | 1773 enum composition_method method = c - 0xF2; \ |
| 1774 int *charbuf_base = charbuf; \ | |
| 1763 int consumed_chars_limit; \ | 1775 int consumed_chars_limit; \ |
| 1764 int nbytes, nchars; \ | 1776 int nbytes, nchars; \ |
| 1765 \ | 1777 \ |
| 1766 ONE_MORE_BYTE (c); \ | 1778 ONE_MORE_BYTE (c); \ |
| 1767 nbytes = c - 0xA0; \ | 1779 nbytes = c - 0xA0; \ |
| 1775 { \ | 1787 { \ |
| 1776 int i = 0; \ | 1788 int i = 0; \ |
| 1777 while (consumed_chars < consumed_chars_limit) \ | 1789 while (consumed_chars < consumed_chars_limit) \ |
| 1778 { \ | 1790 { \ |
| 1779 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ | 1791 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ |
| 1780 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \ | 1792 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \ |
| 1781 else \ | 1793 else \ |
| 1782 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \ | 1794 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \ |
| 1795 i++; \ | |
| 1783 } \ | 1796 } \ |
| 1784 if (consumed_chars < consumed_chars_limit) \ | 1797 if (consumed_chars < consumed_chars_limit) \ |
| 1785 goto invalid_code; \ | 1798 goto invalid_code; \ |
| 1799 charbuf_base[0] -= i; \ | |
| 1786 } \ | 1800 } \ |
| 1787 } while (0) | 1801 } while (0) |
| 1788 | 1802 |
| 1789 | 1803 |
| 1790 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \ | 1804 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \ |
| 1816 int i, j; \ | 1830 int i, j; \ |
| 1817 \ | 1831 \ |
| 1818 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ | 1832 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ |
| 1819 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ | 1833 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ |
| 1820 { \ | 1834 { \ |
| 1821 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \ | 1835 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \ |
| 1822 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ | 1836 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ |
| 1823 } \ | 1837 } \ |
| 1824 if (i < 1 || (buf - components) % 2 == 0) \ | 1838 if (i < 1 || (buf - components) % 2 == 0) \ |
| 1825 goto invalid_code; \ | 1839 goto invalid_code; \ |
| 1826 if (charbuf + i + (i / 2) + 1 < charbuf_end) \ | 1840 if (charbuf + i + (i / 2) + 1 < charbuf_end) \ |
| 1881 else if (c == 0x80) | 1895 else if (c == 0x80) |
| 1882 { | 1896 { |
| 1883 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end) | 1897 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end) |
| 1884 break; | 1898 break; |
| 1885 ONE_MORE_BYTE (c); | 1899 ONE_MORE_BYTE (c); |
| 1886 if (c - 0xF0 >= COMPOSITION_RELATIVE | 1900 if (c - 0xF2 >= COMPOSITION_RELATIVE |
| 1887 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS) | 1901 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) |
| 1888 DECODE_EMACS_MULE_21_COMPOSITION (c); | 1902 DECODE_EMACS_MULE_21_COMPOSITION (c); |
| 1889 else if (c < 0xC0) | 1903 else if (c < 0xC0) |
| 1890 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); | 1904 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); |
| 1891 else if (c == 0xFF) | 1905 else if (c == 0xFF) |
| 1892 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); | 1906 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); |
| 1893 else | 1907 else |
| 1894 goto invalid_code; | 1908 goto invalid_code; |
| 1909 coding->annotated = 1; | |
| 1895 } | 1910 } |
| 1896 else if (c < 0xA0 && emacs_mule_bytes[c] > 1) | 1911 else if (c < 0xA0 && emacs_mule_bytes[c] > 1) |
| 1897 { | 1912 { |
| 1898 int nbytes, nchars; | 1913 int nbytes, nchars; |
| 1899 src--; | 1914 src = src_base; |
| 1900 c = emacs_mule_char (coding, 0, &nbytes, &nchars); | 1915 consumed_chars = consumed_chars_base; |
| 1916 c = emacs_mule_char (coding, src, &nbytes, &nchars); | |
| 1901 if (c < 0) | 1917 if (c < 0) |
| 1902 { | 1918 { |
| 1903 if (c == -2) | 1919 if (c == -2) |
| 1904 break; | 1920 break; |
| 1905 goto invalid_code; | 1921 goto invalid_code; |
| 1906 } | 1922 } |
| 1907 *charbuf++ = c; | 1923 *charbuf++ = c; |
| 1924 src += nbytes; | |
| 1925 consumed_chars += nchars; | |
| 1908 char_offset++; | 1926 char_offset++; |
| 1909 } | 1927 } |
| 1910 continue; | 1928 continue; |
| 1911 | 1929 |
| 1912 invalid_code: | 1930 invalid_code: |
| 2570 */ | 2588 */ |
| 2571 | 2589 |
| 2572 #define DECODE_COMPOSITION_START(c1) \ | 2590 #define DECODE_COMPOSITION_START(c1) \ |
| 2573 do { \ | 2591 do { \ |
| 2574 if (c1 == '0' \ | 2592 if (c1 == '0' \ |
| 2575 && composition_state == COMPOSING_COMPONENT_CHAR) \ | 2593 && composition_state == COMPOSING_COMPONENT_RULE) \ |
| 2576 { \ | 2594 { \ |
| 2577 component_len = component_idx; \ | 2595 component_len = component_idx; \ |
| 2578 composition_state = COMPOSING_CHAR; \ | 2596 composition_state = COMPOSING_CHAR; \ |
| 2579 } \ | 2597 } \ |
| 2580 else \ | 2598 else \ |
| 2723 DECODE_COMPOSITION_RULE (c1); | 2741 DECODE_COMPOSITION_RULE (c1); |
| 2724 components[component_idx++] = c1; | 2742 components[component_idx++] = c1; |
| 2725 composition_state--; | 2743 composition_state--; |
| 2726 continue; | 2744 continue; |
| 2727 } | 2745 } |
| 2728 else if (method == COMPOSITION_WITH_RULE) | |
| 2729 composition_state = COMPOSING_RULE; | |
| 2730 else if (method == COMPOSITION_WITH_RULE_ALTCHARS | |
| 2731 && composition_state == COMPOSING_COMPONENT_CHAR) | |
| 2732 composition_state = COMPOSING_COMPONENT_CHAR; | |
| 2733 } | 2746 } |
| 2734 if (charset_id_0 < 0 | 2747 if (charset_id_0 < 0 |
| 2735 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) | 2748 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) |
| 2749 /* This is SPACE or DEL. */ | |
| 2750 charset = CHARSET_FROM_ID (charset_ascii); | |
| 2751 else | |
| 2752 charset = CHARSET_FROM_ID (charset_id_0); | |
| 2753 break; | |
| 2754 | |
| 2755 case ISO_graphic_plane_0: | |
| 2756 if (composition_state != COMPOSING_NO) | |
| 2736 { | 2757 { |
| 2737 /* This is SPACE or DEL. */ | 2758 if (composition_state == COMPOSING_RULE |
| 2738 charset = CHARSET_FROM_ID (charset_ascii); | 2759 || composition_state == COMPOSING_COMPONENT_RULE) |
| 2739 break; | 2760 { |
| 2740 } | 2761 DECODE_COMPOSITION_RULE (c1); |
| 2741 /* This is a graphic character, we fall down ... */ | 2762 components[component_idx++] = c1; |
| 2742 | 2763 composition_state--; |
| 2743 case ISO_graphic_plane_0: | 2764 continue; |
| 2744 if (composition_state == COMPOSING_RULE) | 2765 } |
| 2745 { | |
| 2746 DECODE_COMPOSITION_RULE (c1); | |
| 2747 components[component_idx++] = c1; | |
| 2748 composition_state = COMPOSING_CHAR; | |
| 2749 } | 2766 } |
| 2750 charset = CHARSET_FROM_ID (charset_id_0); | 2767 charset = CHARSET_FROM_ID (charset_id_0); |
| 2751 break; | 2768 break; |
| 2752 | 2769 |
| 2753 case ISO_0xA0_or_0xFF: | 2770 case ISO_0xA0_or_0xFF: |
| 3007 { | 3024 { |
| 3008 *charbuf++ = c; | 3025 *charbuf++ = c; |
| 3009 char_offset++; | 3026 char_offset++; |
| 3010 } | 3027 } |
| 3011 else | 3028 else |
| 3012 components[component_idx++] = c; | 3029 { |
| 3030 components[component_idx++] = c; | |
| 3031 if (method == COMPOSITION_WITH_RULE | |
| 3032 || (method == COMPOSITION_WITH_RULE_ALTCHARS | |
| 3033 && composition_state == COMPOSING_COMPONENT_CHAR)) | |
| 3034 composition_state++; | |
| 3035 } | |
| 3013 continue; | 3036 continue; |
| 3014 | 3037 |
| 3015 invalid_code: | 3038 invalid_code: |
| 3016 MAYBE_FINISH_COMPOSITION (); | 3039 MAYBE_FINISH_COMPOSITION (); |
| 3017 src = src_base; | 3040 src = src_base; |
| 7783 | 7806 |
| 7784 for (i = 0; i < 256; i++) | 7807 for (i = 0; i < 256; i++) |
| 7785 { | 7808 { |
| 7786 emacs_mule_bytes[i] = 1; | 7809 emacs_mule_bytes[i] = 1; |
| 7787 } | 7810 } |
| 7811 emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3; | |
| 7812 emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3; | |
| 7813 emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4; | |
| 7814 emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4; | |
| 7788 } | 7815 } |
| 7789 | 7816 |
| 7790 #ifdef emacs | 7817 #ifdef emacs |
| 7791 | 7818 |
| 7792 void | 7819 void |
