comparison src/coding.c @ 88585:c7772f702227

(ONE_MORE_BYTE_NO_CHECK): Increment consumed_chars. (emacs_mule_char): New arg src. Delete arg `composition'. Caller changed. Handle 2-byte and 3-byte charsets correctly. (DECODE_EMACS_MULE_COMPOSITION_RULE_20): Renamed from DECODE_EMACS_MULE_COMPOSITION_RULE. Caller changed. (DECODE_EMACS_MULE_COMPOSITION_RULE_21): New macro. (DECODE_EMACS_MULE_21_COMPOSITION): Call DECODE_EMACS_MULE_COMPOSITION_RULE_21. Produce correct annotation sequence. (decode_coding_emacs_mule): Handle composition correctly. Rewind `src' and `consumed_chars' correctly before calling emacs_mule_char. (DECODE_COMPOSITION_START): Correctly handle the case of altchar and alt&rule composition. (decode_coding_iso_2022): Handle composition correctly. (init_coding_once): Setup emacs_mule_bytes for private charsets.
author Kenichi Handa <handa@m17n.org>
date Tue, 21 May 2002 04:22:58 +0000
parents 133bf7ab1bad
children 11186ff7ea0d
comparison
equal deleted inserted replaced
88584:f4f7b1532dc3 88585:c7772f702227
762 { \ 762 { \
763 if ((c & 0xFE) != 0xC0) \ 763 if ((c & 0xFE) != 0xC0) \
764 error ("Undecodable char found"); \ 764 error ("Undecodable char found"); \
765 c = ((c & 1) << 6) | *src++; \ 765 c = ((c & 1) << 6) | *src++; \
766 } \ 766 } \
767 consumed_chars++; \
767 } while (0) 768 } while (0)
768 769
769 770
770 /* Store a byte C in the place pointed by DST and increment DST to the 771 /* Store a byte C in the place pointed by DST and increment DST to the
771 next free point, and increment PRODUCED_CHARS. The caller should 772 next free point, and increment PRODUCED_CHARS. The caller should
1521 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */ 1522 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
1522 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */ 1523 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
1523 1524
1524 1525
1525 int 1526 int
1526 emacs_mule_char (coding, composition, nbytes, nchars) 1527 emacs_mule_char (coding, src, nbytes, nchars)
1527 struct coding_system *coding; 1528 struct coding_system *coding;
1529 unsigned char *src;
1528 int composition; 1530 int composition;
1529 int *nbytes, *nchars; 1531 int *nbytes, *nchars;
1530 { 1532 {
1531 unsigned char *src = coding->source + coding->consumed;
1532 unsigned char *src_end = coding->source + coding->src_bytes; 1533 unsigned char *src_end = coding->source + coding->src_bytes;
1533 int multibytep = coding->src_multibyte; 1534 int multibytep = coding->src_multibyte;
1534 unsigned char *src_base = src; 1535 unsigned char *src_base = src;
1535 struct charset *charset; 1536 struct charset *charset;
1536 unsigned code; 1537 unsigned code;
1537 int c; 1538 int c;
1538 int consumed_chars = 0; 1539 int consumed_chars = 0;
1539 1540
1540 ONE_MORE_BYTE (c); 1541 ONE_MORE_BYTE (c);
1541 if (composition)
1542 {
1543 c -= 0x20;
1544 if (c == 0x80)
1545 {
1546 ONE_MORE_BYTE (c);
1547 if (c < 0xA0)
1548 goto invalid_code;
1549 *nbytes = src - src_base;
1550 *nchars = consumed_chars;
1551 return (c - 0x80);
1552 }
1553 }
1554
1555 switch (emacs_mule_bytes[c]) 1542 switch (emacs_mule_bytes[c])
1556 { 1543 {
1557 case 2: 1544 case 2:
1558 if (! (charset = emacs_mule_charset[c])) 1545 if (! (charset = emacs_mule_charset[c]))
1559 goto invalid_code; 1546 goto invalid_code;
1574 else 1561 else
1575 { 1562 {
1576 if (! (charset = emacs_mule_charset[c])) 1563 if (! (charset = emacs_mule_charset[c]))
1577 goto invalid_code; 1564 goto invalid_code;
1578 ONE_MORE_BYTE (c); 1565 ONE_MORE_BYTE (c);
1579 code = (c & 0x7F) << 7; 1566 code = (c & 0x7F) << 8;
1580 ONE_MORE_BYTE (c); 1567 ONE_MORE_BYTE (c);
1581 code |= c & 0x7F; 1568 code |= c & 0x7F;
1582 } 1569 }
1583 break; 1570 break;
1584 1571
1585 case 4: 1572 case 4:
1573 ONE_MORE_BYTE (c);
1586 if (! (charset = emacs_mule_charset[c])) 1574 if (! (charset = emacs_mule_charset[c]))
1587 goto invalid_code; 1575 goto invalid_code;
1588 ONE_MORE_BYTE (c); 1576 ONE_MORE_BYTE (c);
1589 code = (c & 0x7F) << 7; 1577 code = (c & 0x7F) << 8;
1590 ONE_MORE_BYTE (c); 1578 ONE_MORE_BYTE (c);
1591 code |= c & 0x7F; 1579 code |= c & 0x7F;
1592 break; 1580 break;
1593 1581
1594 case 1: 1582 case 1:
1707 int c; \ 1695 int c; \
1708 int nbytes, nchars; \ 1696 int nbytes, nchars; \
1709 \ 1697 \
1710 if (src == src_end) \ 1698 if (src == src_end) \
1711 break; \ 1699 break; \
1712 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \ 1700 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
1713 if (c < 0) \ 1701 if (c < 0) \
1714 { \ 1702 { \
1715 if (c == -2) \ 1703 if (c == -2) \
1716 break; \ 1704 break; \
1717 goto invalid_code; \ 1705 goto invalid_code; \
1722 } \ 1710 } \
1723 else 1711 else
1724 1712
1725 1713
1726 /* Decode a composition rule represented as a component of composition 1714 /* Decode a composition rule represented as a component of composition
1727 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC 1715 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1728 points an invalid byte sequence, set C to -1. */ 1716 and increment BUF. If SRC points an invalid byte sequence, set C
1729 1717 to -1. */
1730 #define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \ 1718
1719 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1731 do { \ 1720 do { \
1732 int c, gref, nref; \ 1721 int c, gref, nref; \
1733 \ 1722 \
1734 if (src < src_end) \ 1723 if (src >= src_end) \
1735 goto invalid_code; \ 1724 goto invalid_code; \
1736 ONE_MORE_BYTE_NO_CHECK (c); \ 1725 ONE_MORE_BYTE_NO_CHECK (c); \
1737 c -= 0xA0; \ 1726 c -= 0x20; \
1738 if (c < 0 || c >= 81) \ 1727 if (c < 0 || c >= 81) \
1739 goto invalid_code; \ 1728 goto invalid_code; \
1740 \ 1729 \
1741 gref = c / 9, nref = c % 9; \ 1730 gref = c / 9, nref = c % 9; \
1731 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1732 } while (0)
1733
1734
1735 /* Decode a composition rule represented as a component of composition
1736 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1737 and increment BUF. If SRC points an invalid byte sequence, set C
1738 to -1. */
1739
1740 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1741 do { \
1742 int gref, nref; \
1743 \
1744 if (src + 1>= src_end) \
1745 goto invalid_code; \
1746 ONE_MORE_BYTE_NO_CHECK (gref); \
1747 gref -= 0x20; \
1748 ONE_MORE_BYTE_NO_CHECK (nref); \
1749 nref -= 0x20; \
1750 if (gref < 0 || gref >= 81 \
1751 || nref < 0 || nref >= 81) \
1752 goto invalid_code; \
1742 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ 1753 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1743 } while (0) 1754 } while (0)
1744 1755
1745 1756
1746 #define ADD_COMPOSITION_DATA(buf, method, nchars) \ 1757 #define ADD_COMPOSITION_DATA(buf, method, nchars) \
1754 1765
1755 1766
1756 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ 1767 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1757 do { \ 1768 do { \
1758 /* Emacs 21 style format. The first three bytes at SRC are \ 1769 /* Emacs 21 style format. The first three bytes at SRC are \
1759 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ 1770 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1760 the byte length of this composition information, CHARS is the \ 1771 the byte length of this composition information, CHARS is the \
1761 number of characters composed by this composition. */ \ 1772 number of characters composed by this composition. */ \
1762 enum composition_method method = c - 0xF0; \ 1773 enum composition_method method = c - 0xF2; \
1774 int *charbuf_base = charbuf; \
1763 int consumed_chars_limit; \ 1775 int consumed_chars_limit; \
1764 int nbytes, nchars; \ 1776 int nbytes, nchars; \
1765 \ 1777 \
1766 ONE_MORE_BYTE (c); \ 1778 ONE_MORE_BYTE (c); \
1767 nbytes = c - 0xA0; \ 1779 nbytes = c - 0xA0; \
1775 { \ 1787 { \
1776 int i = 0; \ 1788 int i = 0; \
1777 while (consumed_chars < consumed_chars_limit) \ 1789 while (consumed_chars < consumed_chars_limit) \
1778 { \ 1790 { \
1779 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ 1791 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1780 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \ 1792 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1781 else \ 1793 else \
1782 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \ 1794 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1795 i++; \
1783 } \ 1796 } \
1784 if (consumed_chars < consumed_chars_limit) \ 1797 if (consumed_chars < consumed_chars_limit) \
1785 goto invalid_code; \ 1798 goto invalid_code; \
1799 charbuf_base[0] -= i; \
1786 } \ 1800 } \
1787 } while (0) 1801 } while (0)
1788 1802
1789 1803
1790 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \ 1804 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \
1816 int i, j; \ 1830 int i, j; \
1817 \ 1831 \
1818 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 1832 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1819 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ 1833 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1820 { \ 1834 { \
1821 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \ 1835 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1822 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 1836 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1823 } \ 1837 } \
1824 if (i < 1 || (buf - components) % 2 == 0) \ 1838 if (i < 1 || (buf - components) % 2 == 0) \
1825 goto invalid_code; \ 1839 goto invalid_code; \
1826 if (charbuf + i + (i / 2) + 1 < charbuf_end) \ 1840 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
1881 else if (c == 0x80) 1895 else if (c == 0x80)
1882 { 1896 {
1883 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end) 1897 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1884 break; 1898 break;
1885 ONE_MORE_BYTE (c); 1899 ONE_MORE_BYTE (c);
1886 if (c - 0xF0 >= COMPOSITION_RELATIVE 1900 if (c - 0xF2 >= COMPOSITION_RELATIVE
1887 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS) 1901 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
1888 DECODE_EMACS_MULE_21_COMPOSITION (c); 1902 DECODE_EMACS_MULE_21_COMPOSITION (c);
1889 else if (c < 0xC0) 1903 else if (c < 0xC0)
1890 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); 1904 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1891 else if (c == 0xFF) 1905 else if (c == 0xFF)
1892 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); 1906 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1893 else 1907 else
1894 goto invalid_code; 1908 goto invalid_code;
1909 coding->annotated = 1;
1895 } 1910 }
1896 else if (c < 0xA0 && emacs_mule_bytes[c] > 1) 1911 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1897 { 1912 {
1898 int nbytes, nchars; 1913 int nbytes, nchars;
1899 src--; 1914 src = src_base;
1900 c = emacs_mule_char (coding, 0, &nbytes, &nchars); 1915 consumed_chars = consumed_chars_base;
1916 c = emacs_mule_char (coding, src, &nbytes, &nchars);
1901 if (c < 0) 1917 if (c < 0)
1902 { 1918 {
1903 if (c == -2) 1919 if (c == -2)
1904 break; 1920 break;
1905 goto invalid_code; 1921 goto invalid_code;
1906 } 1922 }
1907 *charbuf++ = c; 1923 *charbuf++ = c;
1924 src += nbytes;
1925 consumed_chars += nchars;
1908 char_offset++; 1926 char_offset++;
1909 } 1927 }
1910 continue; 1928 continue;
1911 1929
1912 invalid_code: 1930 invalid_code:
2570 */ 2588 */
2571 2589
2572 #define DECODE_COMPOSITION_START(c1) \ 2590 #define DECODE_COMPOSITION_START(c1) \
2573 do { \ 2591 do { \
2574 if (c1 == '0' \ 2592 if (c1 == '0' \
2575 && composition_state == COMPOSING_COMPONENT_CHAR) \ 2593 && composition_state == COMPOSING_COMPONENT_RULE) \
2576 { \ 2594 { \
2577 component_len = component_idx; \ 2595 component_len = component_idx; \
2578 composition_state = COMPOSING_CHAR; \ 2596 composition_state = COMPOSING_CHAR; \
2579 } \ 2597 } \
2580 else \ 2598 else \
2723 DECODE_COMPOSITION_RULE (c1); 2741 DECODE_COMPOSITION_RULE (c1);
2724 components[component_idx++] = c1; 2742 components[component_idx++] = c1;
2725 composition_state--; 2743 composition_state--;
2726 continue; 2744 continue;
2727 } 2745 }
2728 else if (method == COMPOSITION_WITH_RULE)
2729 composition_state = COMPOSING_RULE;
2730 else if (method == COMPOSITION_WITH_RULE_ALTCHARS
2731 && composition_state == COMPOSING_COMPONENT_CHAR)
2732 composition_state = COMPOSING_COMPONENT_CHAR;
2733 } 2746 }
2734 if (charset_id_0 < 0 2747 if (charset_id_0 < 0
2735 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) 2748 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2749 /* This is SPACE or DEL. */
2750 charset = CHARSET_FROM_ID (charset_ascii);
2751 else
2752 charset = CHARSET_FROM_ID (charset_id_0);
2753 break;
2754
2755 case ISO_graphic_plane_0:
2756 if (composition_state != COMPOSING_NO)
2736 { 2757 {
2737 /* This is SPACE or DEL. */ 2758 if (composition_state == COMPOSING_RULE
2738 charset = CHARSET_FROM_ID (charset_ascii); 2759 || composition_state == COMPOSING_COMPONENT_RULE)
2739 break; 2760 {
2740 } 2761 DECODE_COMPOSITION_RULE (c1);
2741 /* This is a graphic character, we fall down ... */ 2762 components[component_idx++] = c1;
2742 2763 composition_state--;
2743 case ISO_graphic_plane_0: 2764 continue;
2744 if (composition_state == COMPOSING_RULE) 2765 }
2745 {
2746 DECODE_COMPOSITION_RULE (c1);
2747 components[component_idx++] = c1;
2748 composition_state = COMPOSING_CHAR;
2749 } 2766 }
2750 charset = CHARSET_FROM_ID (charset_id_0); 2767 charset = CHARSET_FROM_ID (charset_id_0);
2751 break; 2768 break;
2752 2769
2753 case ISO_0xA0_or_0xFF: 2770 case ISO_0xA0_or_0xFF:
3007 { 3024 {
3008 *charbuf++ = c; 3025 *charbuf++ = c;
3009 char_offset++; 3026 char_offset++;
3010 } 3027 }
3011 else 3028 else
3012 components[component_idx++] = c; 3029 {
3030 components[component_idx++] = c;
3031 if (method == COMPOSITION_WITH_RULE
3032 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3033 && composition_state == COMPOSING_COMPONENT_CHAR))
3034 composition_state++;
3035 }
3013 continue; 3036 continue;
3014 3037
3015 invalid_code: 3038 invalid_code:
3016 MAYBE_FINISH_COMPOSITION (); 3039 MAYBE_FINISH_COMPOSITION ();
3017 src = src_base; 3040 src = src_base;
7783 7806
7784 for (i = 0; i < 256; i++) 7807 for (i = 0; i < 256; i++)
7785 { 7808 {
7786 emacs_mule_bytes[i] = 1; 7809 emacs_mule_bytes[i] = 1;
7787 } 7810 }
7811 emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3;
7812 emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3;
7813 emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4;
7814 emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4;
7788 } 7815 }
7789 7816
7790 #ifdef emacs 7817 #ifdef emacs
7791 7818
7792 void 7819 void