Mercurial > emacs
comparison src/coding.c @ 34888:b469d29c0815
(SAFE_ONE_MORE_BYTE): New macro.
(DECODE_EMACS_MULE_COMPOSITION_CHAR): New macro.
(DECODE_EMACS_MULE_COMPOSITION_RULE): New macro.
(decode_composition_emacs_mule): New function.
(decode_coding_emacs_mule): Decode composition sequence by calling
decode_composition_emacs_mule.
(ENCODE_COMPOSITION_EMACS_MULE): New macro.
(encode_coding_emacs_mule): Changed from macro to function. If
a text contains compostions, encode them correctly.
(setup_coding_system): Set coding->commong_flags for emacs-mule so
that decoding and encoding are required.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Thu, 28 Dec 2000 01:05:02 +0000 |
| parents | e112f39ea5b6 |
| children | 3868f2e7355a |
comparison
equal
deleted
inserted
replaced
| 34887:cf361d741e2c | 34888:b469d29c0815 |
|---|---|
| 511 (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c))) | 511 (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c))) |
| 512 | 512 |
| 513 | 513 |
| 514 /*** 2. Emacs internal format (emacs-mule) handlers ***/ | 514 /*** 2. Emacs internal format (emacs-mule) handlers ***/ |
| 515 | 515 |
| 516 /* Emacs' internal format for encoding multiple character sets is a | 516 /* Emacs' internal format for representation of multiple character |
| 517 kind of multi-byte encoding, i.e. characters are encoded by | 517 sets is a kind of multi-byte encoding, i.e. characters are |
| 518 variable-length sequences of one-byte codes. | 518 represented by variable-length sequences of one-byte codes. |
| 519 | 519 |
| 520 ASCII characters and control characters (e.g. `tab', `newline') are | 520 ASCII characters and control characters (e.g. `tab', `newline') are |
| 521 represented by one-byte sequences which are their ASCII codes, in | 521 represented by one-byte sequences which are their ASCII codes, in |
| 522 the range 0x00 through 0x7F. | 522 the range 0x00 through 0x7F. |
| 523 | 523 |
| 529 one-byte sequences which are their 8-bit code. | 529 one-byte sequences which are their 8-bit code. |
| 530 | 530 |
| 531 The other characters are represented by a sequence of `base | 531 The other characters are represented by a sequence of `base |
| 532 leading-code', optional `extended leading-code', and one or two | 532 leading-code', optional `extended leading-code', and one or two |
| 533 `position-code's. The length of the sequence is determined by the | 533 `position-code's. The length of the sequence is determined by the |
| 534 base leading-code. Leading-code takes the range 0x80 through 0x9F, | 534 base leading-code. Leading-code takes the range 0x81 through 0x9D, |
| 535 whereas extended leading-code and position-code take the range 0xA0 | 535 whereas extended leading-code and position-code take the range 0xA0 |
| 536 through 0xFF. See `charset.h' for more details about leading-code | 536 through 0xFF. See `charset.h' for more details about leading-code |
| 537 and position-code. | 537 and position-code. |
| 538 | 538 |
| 539 --- CODE RANGE of Emacs' internal format --- | 539 --- CODE RANGE of Emacs' internal format --- |
| 540 character set range | 540 character set range |
| 541 ------------- ----- | 541 ------------- ----- |
| 542 ascii 0x00..0x7F | 542 ascii 0x00..0x7F |
| 543 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF | 543 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF |
| 544 eight-bit-graphic 0xA0..0xBF | 544 eight-bit-graphic 0xA0..0xBF |
| 545 ELSE 0x81..0x9F + [0xA0..0xFF]+ | 545 ELSE 0x81..0x9D + [0xA0..0xFF]+ |
| 546 --------------------------------------------- | 546 --------------------------------------------- |
| 547 | 547 |
| 548 As this is the internal character representation, the format is | |
| 549 usually not used externally (i.e. in a file or in a data sent to a | |
| 550 process). But, it is possible to have a text externally in this | |
| 551 format (i.e. by encoding by the coding system `emacs-mule'). | |
| 552 | |
| 553 In that case, a sequence of one-byte codes has a slightly different | |
| 554 form. | |
| 555 | |
| 556 At first, all characters in eight-bit-control are represented by | |
| 557 one-byte sequences which are their 8-bit code. | |
| 558 | |
| 559 Next, character composition data are represented by the byte | |
| 560 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ..., | |
| 561 where, | |
| 562 METHOD is 0xF0 plus one of composition method (enum | |
| 563 composition_method), | |
| 564 | |
| 565 BYTES is 0x20 plus a byte length of this composition data, | |
| 566 | |
| 567 CHARS is 0x20 plus a number of characters composed by this | |
| 568 data, | |
| 569 | |
| 570 COMPONENTs are characters of multibye form or composition | |
| 571 rules encoded by two-byte of ASCII codes. | |
| 572 | |
| 573 In addition, for backward compatibility, the following formats are | |
| 574 also recognized as composition data on decoding. | |
| 575 | |
| 576 0x80 MSEQ ... | |
| 577 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ | |
| 578 | |
| 579 Here, | |
| 580 MSEQ is a multibyte form but in these special format: | |
| 581 ASCII: 0xA0 ASCII_CODE+0x80, | |
| 582 other: LEADING_CODE+0x20 FOLLOWING-BYTE ..., | |
| 583 RULE is a one byte code of the range 0xA0..0xF0 that | |
| 584 represents a composition rule. | |
| 548 */ | 585 */ |
| 549 | 586 |
| 550 enum emacs_code_class_type emacs_code_class[256]; | 587 enum emacs_code_class_type emacs_code_class[256]; |
| 551 | 588 |
| 552 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 589 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 605 } | 642 } |
| 606 label_end_of_loop: | 643 label_end_of_loop: |
| 607 return CODING_CATEGORY_MASK_EMACS_MULE; | 644 return CODING_CATEGORY_MASK_EMACS_MULE; |
| 608 } | 645 } |
| 609 | 646 |
| 647 | |
| 648 /* Record the starting position START and METHOD of one composition. */ | |
| 649 | |
| 650 #define CODING_ADD_COMPOSITION_START(coding, start, method) \ | |
| 651 do { \ | |
| 652 struct composition_data *cmp_data = coding->cmp_data; \ | |
| 653 int *data = cmp_data->data + cmp_data->used; \ | |
| 654 coding->cmp_data_start = cmp_data->used; \ | |
| 655 data[0] = -1; \ | |
| 656 data[1] = cmp_data->char_offset + start; \ | |
| 657 data[3] = (int) method; \ | |
| 658 cmp_data->used += 4; \ | |
| 659 } while (0) | |
| 660 | |
| 661 /* Record the ending position END of the current composition. */ | |
| 662 | |
| 663 #define CODING_ADD_COMPOSITION_END(coding, end) \ | |
| 664 do { \ | |
| 665 struct composition_data *cmp_data = coding->cmp_data; \ | |
| 666 int *data = cmp_data->data + coding->cmp_data_start; \ | |
| 667 data[0] = cmp_data->used - coding->cmp_data_start; \ | |
| 668 data[2] = cmp_data->char_offset + end; \ | |
| 669 } while (0) | |
| 670 | |
| 671 /* Record one COMPONENT (alternate character or composition rule). */ | |
| 672 | |
| 673 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ | |
| 674 (coding->cmp_data->data[coding->cmp_data->used++] = component) | |
| 675 | |
| 676 | |
| 677 /* Get one byte from a data pointed by SRC and increment SRC. If SRC | |
| 678 is not less than SRC_END, return -1 without inccrementing Src. */ | |
| 679 | |
| 680 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++) | |
| 681 | |
| 682 | |
| 683 /* Decode a character represented as a component of composition | |
| 684 sequence of Emacs 20 style at SRC. Set C to that character, store | |
| 685 its multibyte form sequence at P, and set P to the end of that | |
| 686 sequence. If no valid character is found, set C to -1. */ | |
| 687 | |
| 688 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \ | |
| 689 do { \ | |
| 690 int bytes; \ | |
| 691 \ | |
| 692 c = SAFE_ONE_MORE_BYTE (); \ | |
| 693 if (c < 0) \ | |
| 694 break; \ | |
| 695 if (CHAR_HEAD_P (c)) \ | |
| 696 c = -1; \ | |
| 697 else if (c == 0xA0) \ | |
| 698 { \ | |
| 699 c = SAFE_ONE_MORE_BYTE (); \ | |
| 700 if (c < 0xA0) \ | |
| 701 c = -1; \ | |
| 702 else \ | |
| 703 { \ | |
| 704 c -= 0xA0; \ | |
| 705 *p++ = c; \ | |
| 706 } \ | |
| 707 } \ | |
| 708 else if (BASE_LEADING_CODE_P (c - 0x20)) \ | |
| 709 { \ | |
| 710 unsigned char *p0 = p; \ | |
| 711 \ | |
| 712 c -= 0x20; \ | |
| 713 *p++ = c; \ | |
| 714 bytes = BYTES_BY_CHAR_HEAD (c); \ | |
| 715 while (--bytes) \ | |
| 716 { \ | |
| 717 c = SAFE_ONE_MORE_BYTE (); \ | |
| 718 if (c < 0) \ | |
| 719 break; \ | |
| 720 *p++ = c; \ | |
| 721 } \ | |
| 722 if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \ | |
| 723 c = STRING_CHAR (p0, bytes); \ | |
| 724 else \ | |
| 725 c = -1; \ | |
| 726 } \ | |
| 727 else \ | |
| 728 c = -1; \ | |
| 729 } while (0) | |
| 730 | |
| 731 | |
| 732 /* Decode a composition rule represented as a component of composition | |
| 733 sequence of Emacs 20 style at SRC. Set C to the rule. If not | |
| 734 valid rule is found, set C to -1. */ | |
| 735 | |
| 736 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c) \ | |
| 737 do { \ | |
| 738 c = SAFE_ONE_MORE_BYTE (); \ | |
| 739 c -= 0xA0; \ | |
| 740 if (c < 0 || c >= 81) \ | |
| 741 c = -1; \ | |
| 742 else \ | |
| 743 { \ | |
| 744 gref = c / 9, nref = c % 9; \ | |
| 745 c = COMPOSITION_ENCODE_RULE (gref, nref); \ | |
| 746 } \ | |
| 747 } while (0) | |
| 748 | |
| 749 | |
| 750 /* Decode composition sequence encoded by `emacs-mule' at the source | |
| 751 pointed by SRC. SRC_END is the end of source. Store information | |
| 752 of the composition in CODING->cmp_data. | |
| 753 | |
| 754 For backward compatibility, decode also a composition sequence of | |
| 755 Emacs 20 style. In that case, the composition sequence contains | |
| 756 characters that should be extracted into a buffer or string. Store | |
| 757 those characters at *DESTINATION in multibyte form. | |
| 758 | |
| 759 If we encounter an invalid byte sequence, return 0. | |
| 760 If we encounter an insufficient source or destination, or | |
| 761 insufficient space in CODING->cmp_data, return 1. | |
| 762 Otherwise, return consumed bytes in the source. | |
| 763 | |
| 764 */ | |
| 765 static INLINE int | |
| 766 decode_composition_emacs_mule (coding, src, src_end, | |
| 767 destination, dst_end, dst_bytes) | |
| 768 struct coding_system *coding; | |
| 769 unsigned char *src, *src_end, **destination, *dst_end; | |
| 770 int dst_bytes; | |
| 771 { | |
| 772 unsigned char *dst = *destination; | |
| 773 int method, data_len, nchars; | |
| 774 unsigned char *src_base = src++; | |
| 775 /* Store compoments of composition. */ | |
| 776 int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; | |
| 777 int ncomponent; | |
| 778 /* Store multibyte form of characters to be composed. This is for | |
| 779 Emacs 20 style composition sequence. */ | |
| 780 unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH]; | |
| 781 unsigned char *bufp = buf; | |
| 782 int c, i, gref, nref; | |
| 783 | |
| 784 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH | |
| 785 >= COMPOSITION_DATA_SIZE) | |
| 786 { | |
| 787 coding->result = CODING_FINISH_INSUFFICIENT_CMP; | |
| 788 return -1; | |
| 789 } | |
| 790 | |
| 791 ONE_MORE_BYTE (c); | |
| 792 if (c - 0xF0 >= COMPOSITION_RELATIVE | |
| 793 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS) | |
| 794 { | |
| 795 int with_rule; | |
| 796 | |
| 797 method = c - 0xF0; | |
| 798 with_rule = (method == COMPOSITION_WITH_RULE | |
| 799 || method == COMPOSITION_WITH_RULE_ALTCHARS); | |
| 800 ONE_MORE_BYTE (c); | |
| 801 data_len = c - 0xA0; | |
| 802 if (data_len < 4 | |
| 803 || src_base + data_len > src_end) | |
| 804 return 0; | |
| 805 ONE_MORE_BYTE (c); | |
| 806 nchars = c - 0xA0; | |
| 807 if (c < 1) | |
| 808 return 0; | |
| 809 for (ncomponent = 0; src < src_base + data_len; ncomponent++) | |
| 810 { | |
| 811 if (ncomponent % 2 && with_rule) | |
| 812 { | |
| 813 ONE_MORE_BYTE (gref); | |
| 814 gref -= 32; | |
| 815 ONE_MORE_BYTE (nref); | |
| 816 nref -= 32; | |
| 817 c = COMPOSITION_ENCODE_RULE (gref, nref); | |
| 818 } | |
| 819 else | |
| 820 { | |
| 821 int bytes; | |
| 822 if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) | |
| 823 c = STRING_CHAR (src, bytes); | |
| 824 else | |
| 825 c = *src, bytes = 1; | |
| 826 src += bytes; | |
| 827 } | |
| 828 component[ncomponent] = c; | |
| 829 } | |
| 830 } | |
| 831 else | |
| 832 { | |
| 833 /* This may be an old Emacs 20 style format. See the comment at | |
| 834 the section 2 of this file. */ | |
| 835 while (src < src_end && !CHAR_HEAD_P (*src)) src++; | |
| 836 if (src == src_end | |
| 837 && !(coding->mode & CODING_MODE_LAST_BLOCK)) | |
| 838 goto label_end_of_loop; | |
| 839 | |
| 840 src_end = src; | |
| 841 src = src_base + 1; | |
| 842 if (c < 0xC0) | |
| 843 { | |
| 844 method = COMPOSITION_RELATIVE; | |
| 845 for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;) | |
| 846 { | |
| 847 DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); | |
| 848 if (c < 0) | |
| 849 break; | |
| 850 component[ncomponent++] = c; | |
| 851 } | |
| 852 if (ncomponent < 2) | |
| 853 return 0; | |
| 854 nchars = ncomponent; | |
| 855 } | |
| 856 else if (c == 0xFF) | |
| 857 { | |
| 858 method = COMPOSITION_WITH_RULE; | |
| 859 src++; | |
| 860 DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); | |
| 861 if (c < 0) | |
| 862 return 0; | |
| 863 component[0] = c; | |
| 864 for (ncomponent = 1; | |
| 865 ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;) | |
| 866 { | |
| 867 DECODE_EMACS_MULE_COMPOSITION_RULE (c); | |
| 868 if (c < 0) | |
| 869 break; | |
| 870 component[ncomponent++] = c; | |
| 871 DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); | |
| 872 if (c < 0) | |
| 873 break; | |
| 874 component[ncomponent++] = c; | |
| 875 } | |
| 876 if (ncomponent < 3) | |
| 877 return 0; | |
| 878 nchars = (ncomponent + 1) / 2; | |
| 879 } | |
| 880 else | |
| 881 return 0; | |
| 882 } | |
| 883 | |
| 884 if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src)) | |
| 885 { | |
| 886 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method); | |
| 887 for (i = 0; i < ncomponent; i++) | |
| 888 CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]); | |
| 889 CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars); | |
| 890 if (buf < bufp) | |
| 891 { | |
| 892 unsigned char *p = buf; | |
| 893 EMIT_BYTES (p, bufp); | |
| 894 *destination += bufp - buf; | |
| 895 coding->produced_char += nchars; | |
| 896 } | |
| 897 return (src - src_base); | |
| 898 } | |
| 899 label_end_of_loop: | |
| 900 return -1; | |
| 901 } | |
| 610 | 902 |
| 611 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 903 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 612 | 904 |
| 613 static void | 905 static void |
| 614 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | 906 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) |
| 667 } | 959 } |
| 668 *dst++ = *src++; | 960 *dst++ = *src++; |
| 669 coding->produced_char++; | 961 coding->produced_char++; |
| 670 continue; | 962 continue; |
| 671 } | 963 } |
| 672 else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) | 964 else if (*src == 0x80) |
| 673 { | 965 { |
| 674 p = src; | 966 /* Start of composition data. */ |
| 675 src += bytes; | 967 int consumed = decode_composition_emacs_mule (coding, src, src_end, |
| 676 } | 968 &dst, dst_end, |
| 677 else | 969 dst_bytes); |
| 678 { | 970 if (consumed < 0) |
| 971 goto label_end_of_loop; | |
| 972 else if (consumed > 0) | |
| 973 { | |
| 974 src += consumed; | |
| 975 continue; | |
| 976 } | |
| 679 bytes = CHAR_STRING (*src, tmp); | 977 bytes = CHAR_STRING (*src, tmp); |
| 680 p = tmp; | 978 p = tmp; |
| 681 src++; | 979 src++; |
| 682 } | 980 } |
| 981 else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) | |
| 982 { | |
| 983 p = src; | |
| 984 src += bytes; | |
| 985 } | |
| 986 else | |
| 987 { | |
| 988 bytes = CHAR_STRING (*src, tmp); | |
| 989 p = tmp; | |
| 990 src++; | |
| 991 } | |
| 683 if (dst + bytes >= (dst_bytes ? dst_end : src)) | 992 if (dst + bytes >= (dst_bytes ? dst_end : src)) |
| 684 { | 993 { |
| 685 coding->result = CODING_FINISH_INSUFFICIENT_DST; | 994 coding->result = CODING_FINISH_INSUFFICIENT_DST; |
| 686 break; | 995 break; |
| 687 } | 996 } |
| 691 label_end_of_loop: | 1000 label_end_of_loop: |
| 692 coding->consumed = coding->consumed_char = src_base - source; | 1001 coding->consumed = coding->consumed_char = src_base - source; |
| 693 coding->produced = dst - destination; | 1002 coding->produced = dst - destination; |
| 694 } | 1003 } |
| 695 | 1004 |
| 696 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \ | 1005 |
| 697 encode_eol (coding, source, destination, src_bytes, dst_bytes) | 1006 /* Encode composition data stored at DATA into a special byte sequence |
| 698 | 1007 starting by 0x80. Update CODING->cmp_data_start and maybe |
| 1008 CODING->cmp_data for the next call. */ | |
| 1009 | |
| 1010 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data) \ | |
| 1011 do { \ | |
| 1012 unsigned char buf[1024], *p0 = buf, *p; \ | |
| 1013 int len = data[0]; \ | |
| 1014 int i; \ | |
| 1015 \ | |
| 1016 buf[0] = 0x80; \ | |
| 1017 buf[1] = 0xF0 + data[3]; /* METHOD */ \ | |
| 1018 buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */ \ | |
| 1019 p = buf + 4; \ | |
| 1020 if (data[3] == COMPOSITION_WITH_RULE \ | |
| 1021 || data[3] == COMPOSITION_WITH_RULE_ALTCHARS) \ | |
| 1022 { \ | |
| 1023 p += CHAR_STRING (data[4], p); \ | |
| 1024 for (i = 5; i < len; i += 2) \ | |
| 1025 { \ | |
| 1026 int gref, nref; \ | |
| 1027 COMPOSITION_DECODE_RULE (data[i], gref, nref); \ | |
| 1028 *p++ = 0x20 + gref; \ | |
| 1029 *p++ = 0x20 + nref; \ | |
| 1030 p += CHAR_STRING (data[i + 1], p); \ | |
| 1031 } \ | |
| 1032 } \ | |
| 1033 else \ | |
| 1034 { \ | |
| 1035 for (i = 4; i < len; i++) \ | |
| 1036 p += CHAR_STRING (data[i], p); \ | |
| 1037 } \ | |
| 1038 buf[2] = 0xA0 + (p - buf); /* COMPONENTS-BYTES */ \ | |
| 1039 \ | |
| 1040 if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src)) \ | |
| 1041 { \ | |
| 1042 coding->result = CODING_FINISH_INSUFFICIENT_DST; \ | |
| 1043 goto label_end_of_loop; \ | |
| 1044 } \ | |
| 1045 while (p0 < p) \ | |
| 1046 *dst++ = *p0++; \ | |
| 1047 coding->cmp_data_start += data[0]; \ | |
| 1048 if (coding->cmp_data_start == coding->cmp_data->used \ | |
| 1049 && coding->cmp_data->next) \ | |
| 1050 { \ | |
| 1051 coding->cmp_data = coding->cmp_data->next; \ | |
| 1052 coding->cmp_data_start = 0; \ | |
| 1053 } \ | |
| 1054 } while (0) | |
| 1055 | |
| 1056 | |
| 1057 static void encode_eol P_ ((struct coding_system *, unsigned char *, | |
| 1058 unsigned char *, int, int)); | |
| 1059 | |
| 1060 static void | |
| 1061 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | |
| 1062 struct coding_system *coding; | |
| 1063 unsigned char *source, *destination; | |
| 1064 int src_bytes, dst_bytes; | |
| 1065 { | |
| 1066 unsigned char *src = source; | |
| 1067 unsigned char *src_end = source + src_bytes; | |
| 1068 unsigned char *dst = destination; | |
| 1069 unsigned char *dst_end = destination + dst_bytes; | |
| 1070 unsigned char *src_base; | |
| 1071 int c; | |
| 1072 int char_offset; | |
| 1073 int *data; | |
| 1074 | |
| 1075 Lisp_Object translation_table; | |
| 1076 | |
| 1077 translation_table = Qnil; | |
| 1078 | |
| 1079 /* Optimization for the case that there's no composition. */ | |
| 1080 if (!coding->cmp_data || coding->cmp_data->used == 0) | |
| 1081 { | |
| 1082 encode_eol (coding, source, destination, src_bytes, dst_bytes); | |
| 1083 return; | |
| 1084 } | |
| 1085 | |
| 1086 char_offset = coding->cmp_data->char_offset; | |
| 1087 data = coding->cmp_data->data + coding->cmp_data_start; | |
| 1088 while (1) | |
| 1089 { | |
| 1090 src_base = src; | |
| 1091 | |
| 1092 /* If SRC starts a composition, encode the information about the | |
| 1093 composition in advance. */ | |
| 1094 if (coding->cmp_data_start < coding->cmp_data->used | |
| 1095 && char_offset + coding->consumed_char == data[1]) | |
| 1096 { | |
| 1097 ENCODE_COMPOSITION_EMACS_MULE (coding, data); | |
| 1098 char_offset = coding->cmp_data->char_offset; | |
| 1099 data = coding->cmp_data->data + coding->cmp_data_start; | |
| 1100 } | |
| 1101 | |
| 1102 ONE_MORE_CHAR (c); | |
| 1103 if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF | |
| 1104 || coding->eol_type == CODING_EOL_CR)) | |
| 1105 { | |
| 1106 if (coding->eol_type == CODING_EOL_CRLF) | |
| 1107 EMIT_TWO_BYTES ('\r', c); | |
| 1108 else | |
| 1109 EMIT_ONE_BYTE ('\r'); | |
| 1110 } | |
| 1111 else if (SINGLE_BYTE_CHAR_P (c)) | |
| 1112 EMIT_ONE_BYTE (c); | |
| 1113 else | |
| 1114 EMIT_BYTES (src_base, src); | |
| 1115 coding->consumed_char++; | |
| 1116 } | |
| 1117 label_end_of_loop: | |
| 1118 coding->consumed = src_base - source; | |
| 1119 coding->produced = coding->produced_char = dst - destination; | |
| 1120 return; | |
| 1121 } | |
| 699 | 1122 |
| 700 | 1123 |
| 701 /*** 3. ISO2022 handlers ***/ | 1124 /*** 3. ISO2022 handlers ***/ |
| 702 | 1125 |
| 703 /* The following note describes the coding system ISO2022 briefly. | 1126 /* The following note describes the coding system ISO2022 briefly. |
| 1178 coding->cmp_data->next = cmp_data; | 1601 coding->cmp_data->next = cmp_data; |
| 1179 coding->cmp_data = cmp_data; | 1602 coding->cmp_data = cmp_data; |
| 1180 coding->cmp_data_start = 0; | 1603 coding->cmp_data_start = 0; |
| 1181 } | 1604 } |
| 1182 | 1605 |
| 1183 /* Record the starting position START and METHOD of one composition. */ | 1606 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. |
| 1184 | 1607 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1 |
| 1185 #define CODING_ADD_COMPOSITION_START(coding, start, method) \ | 1608 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 |
| 1186 do { \ | 1609 ESC 3 : altchar composition : ESC 3 ALT ... ESC 0 CHAR ... ESC 1 |
| 1187 struct composition_data *cmp_data = coding->cmp_data; \ | 1610 ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1 |
| 1188 int *data = cmp_data->data + cmp_data->used; \ | 1611 */ |
| 1189 coding->cmp_data_start = cmp_data->used; \ | |
| 1190 data[0] = -1; \ | |
| 1191 data[1] = cmp_data->char_offset + start; \ | |
| 1192 data[3] = (int) method; \ | |
| 1193 cmp_data->used += 4; \ | |
| 1194 } while (0) | |
| 1195 | |
| 1196 /* Record the ending position END of the current composition. */ | |
| 1197 | |
| 1198 #define CODING_ADD_COMPOSITION_END(coding, end) \ | |
| 1199 do { \ | |
| 1200 struct composition_data *cmp_data = coding->cmp_data; \ | |
| 1201 int *data = cmp_data->data + coding->cmp_data_start; \ | |
| 1202 data[0] = cmp_data->used - coding->cmp_data_start; \ | |
| 1203 data[2] = cmp_data->char_offset + end; \ | |
| 1204 } while (0) | |
| 1205 | |
| 1206 /* Record one COMPONENT (alternate character or composition rule). */ | |
| 1207 | |
| 1208 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ | |
| 1209 (coding->cmp_data->data[coding->cmp_data->used++] = component) | |
| 1210 | |
| 1211 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */ | |
| 1212 | 1612 |
| 1213 #define DECODE_COMPOSITION_START(c1) \ | 1613 #define DECODE_COMPOSITION_START(c1) \ |
| 1214 do { \ | 1614 do { \ |
| 1215 if (coding->composing == COMPOSITION_DISABLED) \ | 1615 if (coding->composing == COMPOSITION_DISABLED) \ |
| 1216 { \ | 1616 { \ |
| 3086 | 3486 |
| 3087 switch (XFASTINT (coding_type)) | 3487 switch (XFASTINT (coding_type)) |
| 3088 { | 3488 { |
| 3089 case 0: | 3489 case 0: |
| 3090 coding->type = coding_type_emacs_mule; | 3490 coding->type = coding_type_emacs_mule; |
| 3491 coding->common_flags | |
| 3492 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; | |
| 3493 coding->composing = COMPOSITION_NO; | |
| 3091 if (!NILP (coding->post_read_conversion)) | 3494 if (!NILP (coding->post_read_conversion)) |
| 3092 coding->common_flags |= CODING_REQUIRE_DECODING_MASK; | 3495 coding->common_flags |= CODING_REQUIRE_DECODING_MASK; |
| 3093 if (!NILP (coding->pre_write_conversion)) | 3496 if (!NILP (coding->pre_write_conversion)) |
| 3094 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK; | 3497 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK; |
| 3095 break; | 3498 break; |
