comparison src/coding.c @ 26847:2f17ea330dae

Include composite.h. (DECODE_CHARACTER_ASCII): Don't handle composition here. (DECODE_CHARACTER_DIMENSION1): Likewise. Don't check the validity of multibyte code here. (DECODE_CHARACTER_DIMENSION2): Likewise. (detect_coding_emacs_mule): Change the case label from EMACS_leading_code_composition to 0x80. (detect_coding_iso2022): Handle new composition sequence. (DECODE_ISO_CHARACTER): Likewise. (check_composing_code): Deleted. (coding_allocate_composition_data): New function. (CODING_ADD_COMPOSITION_START) (CODING_ADD_COMPOSITION_END) (CODING_ADD_COMPOSITION_COMPONENT) (DECODE_COMPOSITION_START) (DECODE_COMPOSITION_END) (DECODE_COMPOSITION_RULE): New macros. (decode_coding_iso2022): Handle new composition sequence. (ENCODE_ISO_CHARACTER): Don't check composition here. (ENCODE_COMPOSITION_RULE) (ENCODE_COMPOSITION_START): New macros. (ENCODE_COMPOSITION_NO_RULE_START) (ENCODE_COMPOSITION_WITH_RULE_START): Deleted. (ENCODE_COMPOSITION_END): Handle new composition sequence. (ENCODE_COMPOSITION_FAKE_START): New macro. (encode_coding_iso2022): Handle new composition sequence. (ENCODE_SJIS_BIG5_CHARACTER): Delete superfluous `;' at the tail. (encode_coding_sjis_big5): Ignore composition. (setup_coding_system): Initialize new members of struct coding_system. Enable composition only when the coding system has `composition' property t. (coding_free_composition_data) (coding_adjust_composition_offset) (coding_save_composition) (coding_restore_composition): New functions. (code_convert_region): Call coding_save_composition for encoding and coding_allocate_composition_data for decoding. Don't skip ASCII characters if we handle composition on encoding. Call signal_after_change with Check_BORDER. (code_convert_string): Call coding_save_composition for encoding and coding_allocate_composition_data for decoding. Don't skip ASCII characters if we handle composition on encoding. (code_convert_string1): Set Vlast_coding_system_used after calling code_convert_string. (code_convert_string_norecord): Disable composition. (Fset_terminal_coding_system_internal): Likewise. (Fset_safe_terminal_coding_system_internal): Likewise. (Fset_keyboard_coding_system_internal): Likewise. (init_coding_once): Set emacs_code_class[0x80] to EMACS_invalid_code.
author Kenichi Handa <handa@m17n.org>
date Wed, 15 Dec 1999 00:06:45 +0000
parents 936b39bd05b4
children a8f4cebb614d
comparison
equal deleted inserted replaced
26846:1a0f5960e65e 26847:2f17ea330dae
211 point to an appropriate area and the variable `coding' to point to 211 point to an appropriate area and the variable `coding' to point to
212 the coding-system of the currently decoding text in advance. */ 212 the coding-system of the currently decoding text in advance. */
213 213
214 /* Decode one ASCII character C. */ 214 /* Decode one ASCII character C. */
215 215
216 #define DECODE_CHARACTER_ASCII(c) \ 216 #define DECODE_CHARACTER_ASCII(c) \
217 do { \ 217 do { \
218 if (COMPOSING_P (coding->composing)) \ 218 *dst++ = (c) & 0x7F; \
219 { \ 219 coding->produced_char++; \
220 *dst++ = 0xA0, *dst++ = (c) | 0x80; \
221 coding->composed_chars++; \
222 if (((c) | 0x80) < 0xA0) \
223 coding->fake_multibyte = 1; \
224 } \
225 else \
226 { \
227 /* If ASCII charset is invoked to GR, \
228 we must reset MSB now. */ \
229 *dst++ = (c) & 0x7F; \
230 coding->produced_char++; \
231 } \
232 } while (0) 220 } while (0)
233 221
234 /* Decode one DIMENSION1 character whose charset is CHARSET and whose 222 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
235 position-code is C. */ 223 position-code is C. */
236 224
237 #define DECODE_CHARACTER_DIMENSION1(charset, c) \ 225 #define DECODE_CHARACTER_DIMENSION1(charset, c) \
238 do { \ 226 do { \
239 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ 227 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \
240 if (COMPOSING_P (coding->composing)) \ 228 \
241 { \ 229 *dst++ = leading_code; \
242 *dst++ = leading_code + 0x20; \ 230 if ((leading_code = CHARSET_LEADING_CODE_EXT (charset)) > 0) \
243 coding->composed_chars++; \
244 } \
245 else \
246 { \
247 *dst++ = leading_code; \
248 coding->produced_char++; \
249 } \
250 if (leading_code = CHARSET_LEADING_CODE_EXT (charset)) \
251 *dst++ = leading_code; \ 231 *dst++ = leading_code; \
252 *dst++ = (c) | 0x80; \ 232 *dst++ = (c) | 0x80; \
253 if (((c) | 0x80) < 0xA0) \ 233 coding->produced_char++; \
254 coding->fake_multibyte = 1; \
255 } while (0) 234 } while (0)
256 235
257 /* Decode one DIMENSION2 character whose charset is CHARSET and whose 236 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
258 position-codes are C1 and C2. */ 237 position-codes are C1 and C2. */
259 238
260 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2) \ 239 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2) \
261 do { \ 240 do { \
262 DECODE_CHARACTER_DIMENSION1 (charset, c1); \ 241 DECODE_CHARACTER_DIMENSION1 (charset, c1); \
263 *dst++ = (c2) | 0x80; \ 242 *dst++ = (c2) | 0x80; \
264 if (((c2) | 0x80) < 0xA0) \
265 coding->fake_multibyte = 1; \
266 } while (0) 243 } while (0)
267 244
268 245
269 /*** 1. Preamble ***/ 246 /*** 1. Preamble ***/
270 247
277 #ifdef emacs 254 #ifdef emacs
278 255
279 #include "lisp.h" 256 #include "lisp.h"
280 #include "buffer.h" 257 #include "buffer.h"
281 #include "charset.h" 258 #include "charset.h"
259 #include "composite.h"
282 #include "ccl.h" 260 #include "ccl.h"
283 #include "coding.h" 261 #include "coding.h"
284 #include "window.h" 262 #include "window.h"
285 263
286 #else /* not emacs */ 264 #else /* not emacs */
436 by the base leading-code. Leading-code takes the range 0x80 414 by the base leading-code. Leading-code takes the range 0x80
437 through 0x9F, whereas extended leading-code and position-code take 415 through 0x9F, whereas extended leading-code and position-code take
438 the range 0xA0 through 0xFF. See `charset.h' for more details 416 the range 0xA0 through 0xFF. See `charset.h' for more details
439 about leading-code and position-code. 417 about leading-code and position-code.
440 418
441 There's one exception to this rule. Special leading-code
442 `leading-code-composition' denotes that the following several
443 characters should be composed into one character. Leading-codes of
444 components (except for ASCII) are added 0x20. An ASCII character
445 component is represented by a 2-byte sequence of `0xA0' and
446 `ASCII-code + 0x80'. See also the comments in `charset.h' for the
447 details of composite character. Hence, we can summarize the code
448 range as follows:
449
450 --- CODE RANGE of Emacs' internal format --- 419 --- CODE RANGE of Emacs' internal format ---
451 (character set) (range) 420 (character set) (range)
452 ASCII 0x00 .. 0x7F 421 ASCII 0x00 .. 0x7F
453 ELSE (1st byte) 0x80 .. 0x9F 422 ELSE (1st byte) 0x81 .. 0x9F
454 (rest bytes) 0xA0 .. 0xFF 423 (rest bytes) 0xA0 .. 0xFF
455 --------------------------------------------- 424 ---------------------------------------------
456 425
457 */ 426 */
458 427
503 break; 472 break;
504 473
505 case EMACS_invalid_code: 474 case EMACS_invalid_code:
506 return 0; 475 return 0;
507 476
508 case EMACS_leading_code_composition: /* c == 0x80 */ 477 case EMACS_leading_code_4:
478 CHECK_CODE_RANGE_A0_FF;
479 /* fall down to check it two more times ... */
480
481 case EMACS_leading_code_3:
482 CHECK_CODE_RANGE_A0_FF;
483 /* fall down to check it one more time ... */
484
485 case EMACS_leading_code_2:
486 CHECK_CODE_RANGE_A0_FF;
487 break;
488
489 case 0x80: /* Old leading code for a composite character. */
509 if (composing) 490 if (composing)
510 CHECK_CODE_RANGE_A0_FF; 491 CHECK_CODE_RANGE_A0_FF;
511 else 492 else
512 composing = 1; 493 composing = 1;
513 break;
514
515 case EMACS_leading_code_4:
516 CHECK_CODE_RANGE_A0_FF;
517 /* fall down to check it two more times ... */
518
519 case EMACS_leading_code_3:
520 CHECK_CODE_RANGE_A0_FF;
521 /* fall down to check it one more time ... */
522
523 case EMACS_leading_code_2:
524 CHECK_CODE_RANGE_A0_FF;
525 break; 494 break;
526 495
527 default: 496 default:
528 label_end_of_switch: 497 label_end_of_switch:
529 break; 498 break;
681 o CSI '2' ']' -- start of right-to-left text 650 o CSI '2' ']' -- start of right-to-left text
682 The control character CSI (0x9B: control sequence introducer) is 651 The control character CSI (0x9B: control sequence introducer) is
683 abbreviated to the escape sequence ESC '[' in a 7-bit environment. 652 abbreviated to the escape sequence ESC '[' in a 7-bit environment.
684 653
685 Character composition specification takes the following form: 654 Character composition specification takes the following form:
686 o ESC '0' -- start character composition 655 o ESC '0' -- start relative composition
687 o ESC '1' -- end character composition 656 o ESC '1' -- end composition
657 o ESC '2' -- start rule-base composition (*)
658 o ESC '3' -- start relative composition with alternate chars (**)
659 o ESC '4' -- start rule-base composition with alternate chars (**)
688 Since these are not standard escape sequences of any ISO standard, 660 Since these are not standard escape sequences of any ISO standard,
689 the use of them for these meaning is restricted to Emacs only. */ 661 the use of them for these meaning is restricted to Emacs only.
662
663 (*) This form is used only in Emacs 20.5 and the older versions,
664 but the newer versions can safely decode it.
665 (**) This form is used only in Emacs 21.1 and the newer versions,
666 and the older versions can't decode it.
667
668 Here's a list of examples usages of these composition escape
669 sequences (categorized by `enum composition_method').
670
671 COMPOSITION_RELATIVE:
672 ESC 0 CHAR [ CHAR ] ESC 1
673 COMPOSITOIN_WITH_RULE:
674 ESC 2 CHAR [ RULE CHAR ] ESC 1
675 COMPOSITION_WITH_ALTCHARS:
676 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
677 COMPOSITION_WITH_RULE_ALTCHARS:
678 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
690 679
691 enum iso_code_class_type iso_code_class[256]; 680 enum iso_code_class_type iso_code_class[256];
692 681
693 #define CHARSET_OK(idx, charset) \ 682 #define CHARSET_OK(idx, charset) \
694 (coding_system_table[idx] \ 683 (coding_system_table[idx] \
772 { 761 {
773 /* ESC <Fe> for SS2 or SS3. */ 762 /* ESC <Fe> for SS2 or SS3. */
774 mask &= CODING_CATEGORY_MASK_ISO_7_ELSE; 763 mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
775 break; 764 break;
776 } 765 }
777 else if (c == '0' || c == '1' || c == '2') 766 else if (c >= '0' && c <= '4')
778 /* ESC <Fp> for start/end composition. Just ignore. */ 767 {
779 break; 768 /* ESC <Fp> for start/end composition. */
769 mask_found |= CODING_CATEGORY_MASK_ISO;
770 break;
771 }
780 else 772 else
781 /* Invalid escape sequence. Just ignore. */ 773 /* Invalid escape sequence. Just ignore. */
782 break; 774 break;
783 775
784 /* We found a valid designation sequence for CHARSET. */ 776 /* We found a valid designation sequence for CHARSET. */
912 904
913 /* Decode a character of which charset is CHARSET and the 1st position 905 /* Decode a character of which charset is CHARSET and the 1st position
914 code is C1. If dimension of CHARSET is 2, the 2nd position code is 906 code is C1. If dimension of CHARSET is 2, the 2nd position code is
915 fetched from SRC and set to C2. If CHARSET is negative, it means 907 fetched from SRC and set to C2. If CHARSET is negative, it means
916 that we are decoding ill formed text, and what we can do is just to 908 that we are decoding ill formed text, and what we can do is just to
917 read C1 as is. */ 909 read C1 as is.
918 910
919 #define DECODE_ISO_CHARACTER(charset, c1) \ 911 If we are now in the middle of composition sequence, the decoded
920 do { \ 912 character may be ALTCHAR (see the comment above). In that case,
921 int c_alt, charset_alt = (charset); \ 913 the character goes to coding->cmp_data->data instead of DST. */
922 if (COMPOSING_HEAD_P (coding->composing)) \ 914
923 { \ 915 #define DECODE_ISO_CHARACTER(charset, c1) \
924 *dst++ = LEADING_CODE_COMPOSITION; \ 916 do { \
925 if (COMPOSING_WITH_RULE_P (coding->composing)) \ 917 int c_alt = -1, charset_alt = (charset); \
926 /* To tell composition rules are embeded. */ \ 918 if (charset_alt >= 0) \
927 *dst++ = 0xFF; \ 919 { \
928 coding->composing += 2; \ 920 if (CHARSET_DIMENSION (charset_alt) == 2) \
929 } \ 921 { \
930 if (charset_alt >= 0) \ 922 ONE_MORE_BYTE (c2); \
931 { \ 923 if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \
932 if (CHARSET_DIMENSION (charset_alt) == 2) \ 924 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \
933 { \ 925 { \
934 ONE_MORE_BYTE (c2); \ 926 src--; \
935 if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \ 927 charset_alt = CHARSET_ASCII; \
936 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \ 928 } \
937 { \ 929 } \
938 src--; \ 930 if (!NILP (translation_table) \
939 charset_alt = CHARSET_ASCII; \ 931 && ((c_alt = translate_char (translation_table, \
940 } \
941 } \
942 if (!NILP (translation_table) \
943 && ((c_alt = translate_char (translation_table, \
944 -1, charset_alt, c1, c2)) >= 0)) \ 932 -1, charset_alt, c1, c2)) >= 0)) \
945 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ 933 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
946 } \ 934 } \
947 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \ 935 if (! COMPOSING_P (coding) \
948 DECODE_CHARACTER_ASCII (c1); \ 936 || coding->composing == COMPOSITION_RELATIVE \
949 else if (CHARSET_DIMENSION (charset_alt) == 1) \ 937 || coding->composing == COMPOSITION_WITH_RULE) \
950 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \ 938 { \
951 else \ 939 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
952 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ 940 DECODE_CHARACTER_ASCII (c1); \
953 if (COMPOSING_WITH_RULE_P (coding->composing)) \ 941 else if (CHARSET_DIMENSION (charset_alt) == 1) \
954 /* To tell a composition rule follows. */ \ 942 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
955 coding->composing = COMPOSING_WITH_RULE_RULE; \ 943 else \
944 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
945 } \
946 if (COMPOSING_P (coding) \
947 && coding->composing != COMPOSITION_RELATIVE) \
948 { \
949 if (c_alt < 0) \
950 c_alt = MAKE_CHAR (charset_alt, c1, c2); \
951 CODING_ADD_COMPOSITION_COMPONENT (coding, c_alt); \
952 coding->composition_rule_follows \
953 = coding->composing != COMPOSITION_WITH_ALTCHARS; \
954 } \
956 } while (0) 955 } while (0)
957 956
958 /* Set designation state into CODING. */ 957 /* Set designation state into CODING. */
959 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ 958 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
960 do { \ 959 do { \
989 coding->spec.iso2022.last_invalid_designation_register = reg; \ 988 coding->spec.iso2022.last_invalid_designation_register = reg; \
990 goto label_invalid_code; \ 989 goto label_invalid_code; \
991 } \ 990 } \
992 } while (0) 991 } while (0)
993 992
994 /* Return 0 if there's a valid composing sequence starting at SRC and 993 /* Allocate a memory block for storing information about compositions.
995 ending before SRC_END, else return -1. */ 994 The block is chained to the already allocated blocks. */
996 995
997 int 996 static void
998 check_composing_code (coding, src, src_end) 997 coding_allocate_composition_data (coding, char_offset)
999 struct coding_system *coding; 998 struct coding_system *coding;
1000 unsigned char *src, *src_end; 999 int char_offset;
1001 { 1000 {
1002 int charset, c, c1, dim; 1001 struct composition_data *cmp_data
1003 1002 = (struct composition_data *) xmalloc (sizeof *cmp_data);
1004 while (src < src_end) 1003
1005 { 1004 cmp_data->char_offset = char_offset;
1006 c = *src++; 1005 cmp_data->used = 0;
1007 if (c >= 0x20) 1006 cmp_data->prev = coding->cmp_data;
1008 continue; 1007 cmp_data->next = NULL;
1009 if (c != ISO_CODE_ESC || src >= src_end) 1008 if (coding->cmp_data)
1010 return -1; 1009 coding->cmp_data->next = cmp_data;
1011 c = *src++; 1010 coding->cmp_data = cmp_data;
1012 if (c == '1') /* end of compsition */ 1011 coding->cmp_data_start = 0;
1013 return 0; 1012 }
1014 if (src + 2 >= src_end 1013
1015 || !coding->flags & CODING_FLAG_ISO_DESIGNATION) 1014 /* Record the starting position START and METHOD of one composition. */
1016 return -1; 1015
1017 1016 #define CODING_ADD_COMPOSITION_START(coding, start, method) \
1018 dim = (c == '$'); 1017 do { \
1019 if (dim == 1) 1018 struct composition_data *cmp_data = coding->cmp_data; \
1020 c = (*src >= '@' && *src <= 'B') ? '(' : *src++; 1019 int *data = cmp_data->data + cmp_data->used; \
1021 if (c >= '(' && c <= '/') 1020 coding->cmp_data_start = cmp_data->used; \
1022 { 1021 data[0] = -1; \
1023 c1 = *src++; 1022 data[1] = cmp_data->char_offset + start; \
1024 if ((c1 < ' ' || c1 >= 0x80) 1023 data[3] = (int) method; \
1025 || (charset = iso_charset_table[dim][c >= ','][c1]) < 0 1024 cmp_data->used += 4; \
1026 || ! coding->safe_charsets[charset] 1025 } while (0)
1027 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) 1026
1028 == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) 1027 /* Record the ending position END of the current composition. */
1029 return -1; 1028
1030 } 1029 #define CODING_ADD_COMPOSITION_END(coding, end) \
1031 else 1030 do { \
1032 return -1; 1031 struct composition_data *cmp_data = coding->cmp_data; \
1033 } 1032 int *data = cmp_data->data + coding->cmp_data_start; \
1034 1033 data[0] = cmp_data->used - coding->cmp_data_start; \
1035 /* We have not found the sequence "ESC 1". */ 1034 data[2] = cmp_data->char_offset + end; \
1036 return -1; 1035 } while (0)
1037 } 1036
1037 /* Record one COMPONENT (alternate character or composition rule). */
1038
1039 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
1040 (coding->cmp_data->data[coding->cmp_data->used++] = component)
1041
1042 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */
1043
1044 #define DECODE_COMPOSITION_START(c1) \
1045 do { \
1046 if (coding->composing == COMPOSITION_DISABLED) \
1047 { \
1048 *dst++ = ISO_CODE_ESC; \
1049 *dst++ = c1 & 0x7f; \
1050 coding->produced_char += 2; \
1051 } \
1052 else if (!COMPOSING_P (coding)) \
1053 { \
1054 /* This is surely the start of a composition. We must be sure \
1055 that coding->cmp_data has enough space to store the \
1056 information about the composition. If not, terminate the \
1057 current decoding loop, allocate one more memory block for \
1058 coding->cmp_data in the calller, then start the decoding \
1059 loop again. We can't allocate memory here directly because \
1060 it may cause buffer/string relocation. */ \
1061 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1062 >= COMPOSITION_DATA_SIZE) \
1063 { \
1064 result = CODING_FINISH_INSUFFICIENT_CMP; \
1065 goto label_end_of_loop_2; \
1066 } \
1067 coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
1068 : c1 == '2' ? COMPOSITION_WITH_RULE \
1069 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
1070 : COMPOSITION_WITH_RULE_ALTCHARS); \
1071 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
1072 coding->composing); \
1073 coding->composition_rule_follows = 0; \
1074 } \
1075 else \
1076 { \
1077 /* We are already handling a composition. If the method is \
1078 the following two, the codes following the current escape \
1079 sequence are actual characters stored in a buffer. */ \
1080 if (coding->composing == COMPOSITION_WITH_ALTCHARS \
1081 || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
1082 { \
1083 coding->composing = COMPOSITION_RELATIVE; \
1084 coding->composition_rule_follows = 0; \
1085 } \
1086 } \
1087 } while (0)
1088
1089 /* Handle compositoin end sequence ESC 1. */
1090
1091 #define DECODE_COMPOSITION_END(c1) \
1092 do { \
1093 if (coding->composing == COMPOSITION_DISABLED) \
1094 { \
1095 *dst++ = ISO_CODE_ESC; \
1096 *dst++ = c1; \
1097 coding->produced_char += 2; \
1098 } \
1099 else \
1100 { \
1101 CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
1102 coding->composing = COMPOSITION_NO; \
1103 } \
1104 } while (0)
1105
1106 /* Decode a composition rule from the byte C1 (and maybe one more byte
1107 from SRC) and store one encoded composition rule in
1108 coding->cmp_data. */
1109
1110 #define DECODE_COMPOSITION_RULE(c1) \
1111 do { \
1112 int rule = 0; \
1113 (c1) -= 32; \
1114 if (c1 < 81) /* old format (before ver.21) */ \
1115 { \
1116 int gref = (c1) / 9; \
1117 int nref = (c1) % 9; \
1118 if (gref == 4) gref = 10; \
1119 if (nref == 4) nref = 10; \
1120 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
1121 } \
1122 else if (c1 < 93) /* new format (after ver.21 */ \
1123 { \
1124 ONE_MORE_BYTE (c2); \
1125 rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
1126 } \
1127 CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \
1128 coding->composition_rule_follows = 0; \
1129 } while (0)
1130
1038 1131
1039 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 1132 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1040 1133
1041 int 1134 int
1042 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) 1135 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1075 ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset 1168 ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset
1076 to SRC_BASE before exiting. */ 1169 to SRC_BASE before exiting. */
1077 unsigned char *src_base = src; 1170 unsigned char *src_base = src;
1078 int c1 = *src++, c2; 1171 int c1 = *src++, c2;
1079 1172
1173 /* We produce no character or one character. */
1080 switch (iso_code_class [c1]) 1174 switch (iso_code_class [c1])
1081 { 1175 {
1082 case ISO_0x20_or_0x7F: 1176 case ISO_0x20_or_0x7F:
1083 if (!coding->composing 1177 if (COMPOSING_P (coding) && coding->composition_rule_follows)
1084 && (charset0 < 0 || CHARSET_CHARS (charset0) == 94)) 1178 {
1179 DECODE_COMPOSITION_RULE (c1);
1180 break;
1181 }
1182 if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1085 { 1183 {
1086 /* This is SPACE or DEL. */ 1184 /* This is SPACE or DEL. */
1087 *dst++ = c1; 1185 *dst++ = c1;
1088 coding->produced_char++; 1186 coding->produced_char++;
1089 break; 1187 break;
1090 } 1188 }
1091 /* This is a graphic character, we fall down ... */ 1189 /* This is a graphic character, we fall down ... */
1092 1190
1093 case ISO_graphic_plane_0: 1191 case ISO_graphic_plane_0:
1094 if (coding->composing == COMPOSING_WITH_RULE_RULE) 1192 if (COMPOSING_P (coding) && coding->composition_rule_follows)
1095 { 1193 DECODE_COMPOSITION_RULE (c1);
1096 /* This is a composition rule. */
1097 *dst++ = c1 | 0x80;
1098 coding->composing = COMPOSING_WITH_RULE_TAIL;
1099 }
1100 else 1194 else
1101 DECODE_ISO_CHARACTER (charset0, c1); 1195 DECODE_ISO_CHARACTER (charset0, c1);
1102 break; 1196 break;
1103 1197
1104 case ISO_0xA0_or_0xFF: 1198 case ISO_0xA0_or_0xFF:
1108 /* This is a graphic character, we fall down ... */ 1202 /* This is a graphic character, we fall down ... */
1109 1203
1110 case ISO_graphic_plane_1: 1204 case ISO_graphic_plane_1:
1111 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) 1205 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1112 goto label_invalid_code; 1206 goto label_invalid_code;
1113 else 1207 DECODE_ISO_CHARACTER (charset1, c1);
1114 DECODE_ISO_CHARACTER (charset1, c1);
1115 break; 1208 break;
1116 1209
1117 case ISO_control_code: 1210 case ISO_control_code:
1211 if (COMPOSING_P (coding))
1212 DECODE_COMPOSITION_END ('1');
1213
1118 /* All ISO2022 control characters in this class have the 1214 /* All ISO2022 control characters in this class have the
1119 same representation in Emacs internal format. */ 1215 same representation in Emacs internal format. */
1120 if (c1 == '\n' 1216 if (c1 == '\n'
1121 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) 1217 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1122 && (coding->eol_type == CODING_EOL_CR 1218 && (coding->eol_type == CODING_EOL_CR
1125 result = CODING_FINISH_INCONSISTENT_EOL; 1221 result = CODING_FINISH_INCONSISTENT_EOL;
1126 goto label_end_of_loop_2; 1222 goto label_end_of_loop_2;
1127 } 1223 }
1128 *dst++ = c1; 1224 *dst++ = c1;
1129 coding->produced_char++; 1225 coding->produced_char++;
1130 if (c1 >= 0x80)
1131 coding->fake_multibyte = 1;
1132 break; 1226 break;
1133 1227
1134 case ISO_carriage_return: 1228 case ISO_carriage_return:
1229 if (COMPOSING_P (coding))
1230 DECODE_COMPOSITION_END ('1');
1231
1135 if (coding->eol_type == CODING_EOL_CR) 1232 if (coding->eol_type == CODING_EOL_CR)
1136 *dst++ = '\n'; 1233 *dst++ = '\n';
1137 else if (coding->eol_type == CODING_EOL_CRLF) 1234 else if (coding->eol_type == CODING_EOL_CRLF)
1138 { 1235 {
1139 ONE_MORE_BYTE (c1); 1236 ONE_MORE_BYTE (c1);
1263 ONE_MORE_BYTE (c1); 1360 ONE_MORE_BYTE (c1);
1264 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3); 1361 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1265 DECODE_ISO_CHARACTER (charset, c1); 1362 DECODE_ISO_CHARACTER (charset, c1);
1266 break; 1363 break;
1267 1364
1268 case '0': case '2': /* start composing */ 1365 case '0': case '2': case '3': case '4': /* start composition */
1269 /* Before processing composing, we must be sure that all 1366 DECODE_COMPOSITION_START (c1);
1270 characters being composed are supported by CODING.
1271 If not, we must give up composing. */
1272 if (check_composing_code (coding, src, src_end) == 0)
1273 {
1274 /* We are looking at a valid composition sequence. */
1275 coding->composing = (c1 == '0'
1276 ? COMPOSING_NO_RULE_HEAD
1277 : COMPOSING_WITH_RULE_HEAD);
1278 coding->composed_chars = 0;
1279 }
1280 else
1281 {
1282 *dst++ = ISO_CODE_ESC;
1283 *dst++ = c1;
1284 coding->produced_char += 2;
1285 }
1286 break; 1367 break;
1287 1368
1288 case '1': /* end composing */ 1369 case '1': /* end composition */
1289 if (!coding->composing) 1370 DECODE_COMPOSITION_END (c1);
1290 {
1291 *dst++ = ISO_CODE_ESC;
1292 *dst++ = c1;
1293 coding->produced_char += 2;
1294 break;
1295 }
1296
1297 if (coding->composed_chars > 0)
1298 {
1299 if (coding->composed_chars == 1)
1300 {
1301 unsigned char *this_char_start = dst;
1302 int this_bytes;
1303
1304 /* Only one character is in the composing
1305 sequence. Make it a normal character. */
1306 while (*--this_char_start != LEADING_CODE_COMPOSITION);
1307 dst = (this_char_start
1308 + (coding->composing == COMPOSING_NO_RULE_TAIL
1309 ? 1 : 2));
1310 *dst -= 0x20;
1311 if (*dst == 0x80)
1312 *++dst &= 0x7F;
1313 this_bytes = BYTES_BY_CHAR_HEAD (*dst);
1314 while (this_bytes--) *this_char_start++ = *dst++;
1315 dst = this_char_start;
1316 }
1317 coding->produced_char++;
1318 }
1319 coding->composing = COMPOSING_NO;
1320 break; 1371 break;
1321 1372
1322 case '[': /* specification of direction */ 1373 case '[': /* specification of direction */
1323 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) 1374 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1324 goto label_invalid_code; 1375 goto label_invalid_code;
1375 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1426 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1376 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); 1427 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1377 break; 1428 break;
1378 1429
1379 label_invalid_code: 1430 label_invalid_code:
1431 if (COMPOSING_P (coding))
1432 DECODE_COMPOSITION_END ('1');
1433 coding->produced_char += src - src_base;
1380 while (src_base < src) 1434 while (src_base < src)
1381 *dst++ = *src_base++; 1435 *dst++ = (*src_base++) & 0x7F;
1382 coding->fake_multibyte = 1;
1383 } 1436 }
1384 continue; 1437 continue;
1385 1438
1386 label_end_of_loop: 1439 label_end_of_loop:
1387 result = CODING_FINISH_INSUFFICIENT_SRC; 1440 result = CODING_FINISH_INSUFFICIENT_SRC;
1398 && coding->mode & CODING_MODE_LAST_BLOCK) 1451 && coding->mode & CODING_MODE_LAST_BLOCK)
1399 { 1452 {
1400 /* This is the last block of the text to be decoded. We had 1453 /* This is the last block of the text to be decoded. We had
1401 better just flush out all remaining codes in the text 1454 better just flush out all remaining codes in the text
1402 although they are not valid characters. */ 1455 although they are not valid characters. */
1456 if (COMPOSING_P (coding))
1457 DECODE_COMPOSITION_END ('1');
1403 src_bytes = src_end - src; 1458 src_bytes = src_end - src;
1404 if (dst_bytes && (dst_end - dst < src_bytes)) 1459 if (dst_bytes && (dst_end - dst < src_end - src))
1405 src_bytes = dst_end - dst; 1460 src_end = src + (dst_end - dst);
1406 bcopy (src, dst, src_bytes); 1461 coding->produced_char += src_end - src;
1407 dst += src_bytes; 1462 while (src < src_end)
1408 src += src_bytes; 1463 *dst++ = (*src++) & 0x7F;
1409 coding->fake_multibyte = 1;
1410 } 1464 }
1411 } 1465 }
1412 1466
1413 coding->consumed = coding->consumed_char = src - source; 1467 coding->consumed = coding->consumed_char = src - source;
1414 coding->produced = dst - destination; 1468 coding->produced = dst - destination;
1622 } while (1) 1676 } while (1)
1623 1677
1624 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ 1678 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \
1625 do { \ 1679 do { \
1626 int c_alt, charset_alt; \ 1680 int c_alt, charset_alt; \
1681 \
1627 if (!NILP (translation_table) \ 1682 if (!NILP (translation_table) \
1628 && ((c_alt = translate_char (translation_table, -1, \ 1683 && ((c_alt = translate_char (translation_table, -1, \
1629 charset, c1, c2)) \ 1684 charset, c1, c2)) \
1630 >= 0)) \ 1685 >= 0)) \
1631 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ 1686 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
1663 *dst++ = c1; \ 1718 *dst++ = c1; \
1664 if (c2) \ 1719 if (c2) \
1665 *dst++ = c2; \ 1720 *dst++ = c2; \
1666 } \ 1721 } \
1667 } \ 1722 } \
1668 if (! COMPOSING_P (coding->composing)) \ 1723 coding->consumed_char++; \
1669 coding->consumed_char++; \
1670 } while (0) 1724 } while (0)
1671 1725
1672 /* Produce designation and invocation codes at a place pointed by DST 1726 /* Produce designation and invocation codes at a place pointed by DST
1673 to use CHARSET. The element `spec.iso2022' of *CODING is updated. 1727 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1674 Return new DST. */ 1728 Return new DST. */
1730 } 1784 }
1731 } 1785 }
1732 return dst; 1786 return dst;
1733 } 1787 }
1734 1788
1735 /* The following two macros produce codes for indicating composition. */ 1789 /* Produce 2-byte codes for encoded composition rule RULE. */
1736 #define ENCODE_COMPOSITION_NO_RULE_START *dst++ = ISO_CODE_ESC, *dst++ = '0' 1790
1737 #define ENCODE_COMPOSITION_WITH_RULE_START *dst++ = ISO_CODE_ESC, *dst++ = '2' 1791 #define ENCODE_COMPOSITION_RULE(rule) \
1738 #define ENCODE_COMPOSITION_END *dst++ = ISO_CODE_ESC, *dst++ = '1' 1792 do { \
1793 int gref, nref; \
1794 COMPOSITION_DECODE_RULE (rule, gref, nref); \
1795 *dst++ = 32 + 81 + gref; \
1796 *dst++ = 32 + nref; \
1797 } while (0)
1798
1799 /* Produce codes for indicating the start of a composition sequence
1800 (ESC 0, ESC 3, or ESC 4). DATA points to an array of integers
1801 which specify information about the composition. See the comment
1802 in coding.h for the format of DATA. */
1803
1804 #define ENCODE_COMPOSITION_START(coding, data) \
1805 do { \
1806 coding->composing = data[3]; \
1807 *dst++ = ISO_CODE_ESC; \
1808 if (coding->composing == COMPOSITION_RELATIVE) \
1809 *dst++ = '0'; \
1810 else \
1811 { \
1812 *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS \
1813 ? '3' : '4'); \
1814 coding->cmp_data_index = coding->cmp_data_start + 4; \
1815 coding->composition_rule_follows = 0; \
1816 } \
1817 } while (0)
1818
1819 /* Produce codes for indicating the end of the current composition. */
1820
1821 #define ENCODE_COMPOSITION_END(coding, data) \
1822 do { \
1823 *dst++ = ISO_CODE_ESC; \
1824 *dst++ = '1'; \
1825 coding->cmp_data_start += data[0]; \
1826 coding->composing = COMPOSITION_NO; \
1827 if (coding->cmp_data_start == coding->cmp_data->used \
1828 && coding->cmp_data->next) \
1829 { \
1830 coding->cmp_data = coding->cmp_data->next; \
1831 coding->cmp_data_start = 0; \
1832 } \
1833 } while (0)
1834
1835 /* Produce composition start sequence ESC 0. Here, this sequence
1836 doesn't mean the start of a new composition but means that we have
1837 just produced components (alternate chars and composition rules) of
1838 the composition and the actual text follows in SRC. */
1839
1840 #define ENCODE_COMPOSITION_FAKE_START(coding) \
1841 do { \
1842 *dst++ = ISO_CODE_ESC; \
1843 *dst++ = '0'; \
1844 coding->composing = COMPOSITION_RELATIVE; \
1845 } while (0)
1739 1846
1740 /* The following three macros produce codes for indicating direction 1847 /* The following three macros produce codes for indicating direction
1741 of text. */ 1848 of text. */
1742 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \ 1849 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
1743 do { \ 1850 do { \
1834 { 1941 {
1835 unsigned char *src = source; 1942 unsigned char *src = source;
1836 unsigned char *src_end = source + src_bytes; 1943 unsigned char *src_end = source + src_bytes;
1837 unsigned char *dst = destination; 1944 unsigned char *dst = destination;
1838 unsigned char *dst_end = destination + dst_bytes; 1945 unsigned char *dst_end = destination + dst_bytes;
1839 /* Since the maximum bytes produced by each loop is 20, we subtract 19 1946 /* Since the maximum bytes produced by each loop is 14, we subtract 13
1840 from DST_END to assure overflow checking is necessary only at the 1947 from DST_END to assure overflow checking is necessary only at the
1841 head of loop. */ 1948 head of loop. */
1842 unsigned char *adjusted_dst_end = dst_end - 19; 1949 unsigned char *adjusted_dst_end = dst_end - 13;
1843 Lisp_Object translation_table 1950 Lisp_Object translation_table
1844 = coding->translation_table_for_encode; 1951 = coding->translation_table_for_encode;
1845 int result = CODING_FINISH_NORMAL; 1952 int result = CODING_FINISH_NORMAL;
1846 1953
1847 if (!NILP (Venable_character_translation) && NILP (translation_table)) 1954 if (!NILP (Venable_character_translation) && NILP (translation_table))
1849 1956
1850 coding->consumed_char = 0; 1957 coding->consumed_char = 0;
1851 coding->fake_multibyte = 0; 1958 coding->fake_multibyte = 0;
1852 while (src < src_end && (dst_bytes 1959 while (src < src_end && (dst_bytes
1853 ? (dst < adjusted_dst_end) 1960 ? (dst < adjusted_dst_end)
1854 : (dst < src - 19))) 1961 : (dst < src - 13)))
1855 { 1962 {
1856 /* SRC_BASE remembers the start position in source in each loop. 1963 /* SRC_BASE remembers the start position in source in each loop.
1857 The loop will be exited when there's not enough source text 1964 The loop will be exited when there's not enough source text
1858 to analyze multi-byte codes (within macros ONE_MORE_BYTE, 1965 to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1859 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is 1966 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
1868 encode_designation_at_bol (coding, translation_table, 1975 encode_designation_at_bol (coding, translation_table,
1869 src, src_end, &dst); 1976 src, src_end, &dst);
1870 CODING_SPEC_ISO_BOL (coding) = 0; 1977 CODING_SPEC_ISO_BOL (coding) = 0;
1871 } 1978 }
1872 1979
1873 c1 = *src++; 1980 /* Check composition start and end. */
1874 /* If we are seeing a component of a composite character, we are 1981 if (coding->composing != COMPOSITION_DISABLED
1875 seeing a leading-code encoded irregularly for composition, or 1982 && coding->cmp_data_start < coding->cmp_data->used)
1876 a composition rule if composing with rule. We must set C1 to 1983 {
1877 a normal leading-code or an ASCII code. If we are not seeing 1984 struct composition_data *cmp_data = coding->cmp_data;
1878 a composite character, we must reset composition, 1985 int *data = cmp_data->data + coding->cmp_data_start;
1879 designation, and invocation states. */ 1986 int this_pos = cmp_data->char_offset + coding->consumed_char;
1880 if (COMPOSING_P (coding->composing)) 1987
1881 { 1988 if (coding->composing == COMPOSITION_RELATIVE)
1882 if (c1 < 0xA0)
1883 { 1989 {
1884 /* We are not in a composite character any longer. */ 1990 if (this_pos == data[2])
1885 coding->composing = COMPOSING_NO; 1991 {
1886 ENCODE_RESET_PLANE_AND_REGISTER; 1992 ENCODE_COMPOSITION_END (coding, data);
1887 ENCODE_COMPOSITION_END; 1993 cmp_data = coding->cmp_data;
1994 data = cmp_data->data + coding->cmp_data_start;
1995 }
1888 } 1996 }
1889 else 1997 else if (COMPOSING_P (coding))
1890 { 1998 {
1891 if (coding->composing == COMPOSING_WITH_RULE_RULE) 1999 /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */
2000 if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2001 /* We have consumed components of the composition.
2002 What follows in SRC is the compositions's base
2003 text. */
2004 ENCODE_COMPOSITION_FAKE_START (coding);
2005 else
1892 { 2006 {
1893 *dst++ = c1 & 0x7F; 2007 int c = cmp_data->data[coding->cmp_data_index++];
1894 coding->composing = COMPOSING_WITH_RULE_HEAD; 2008 if (coding->composition_rule_follows)
2009 {
2010 ENCODE_COMPOSITION_RULE (c);
2011 coding->composition_rule_follows = 0;
2012 }
2013 else
2014 {
2015 SPLIT_CHAR (c, charset, c1, c2);
2016 ENCODE_ISO_CHARACTER (charset, c1, c2);
2017 /* But, we didn't consume a character in SRC. */
2018 coding->consumed_char--;
2019 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2020 coding->composition_rule_follows = 1;
2021 }
1895 continue; 2022 continue;
1896 } 2023 }
1897 else if (coding->composing == COMPOSING_WITH_RULE_HEAD) 2024 }
1898 coding->composing = COMPOSING_WITH_RULE_RULE; 2025 if (!COMPOSING_P (coding))
1899 if (c1 == 0xA0) 2026 {
2027 if (this_pos == data[1])
1900 { 2028 {
1901 /* This is an ASCII component. */ 2029 ENCODE_COMPOSITION_START (coding, data);
1902 ONE_MORE_BYTE (c1); 2030 continue;
1903 c1 &= 0x7F;
1904 } 2031 }
1905 else
1906 /* This is a leading-code of non ASCII component. */
1907 c1 -= 0x20;
1908 } 2032 }
1909 } 2033 }
1910 2034
2035 c1 = *src++;
1911 /* Now encode one character. C1 is a control character, an 2036 /* Now encode one character. C1 is a control character, an
1912 ASCII character, or a leading-code of multi-byte character. */ 2037 ASCII character, or a leading-code of multi-byte character. */
1913 switch (emacs_code_class[c1]) 2038 switch (emacs_code_class[c1])
1914 { 2039 {
1915 case EMACS_ascii_code: 2040 case EMACS_ascii_code:
1994 } 2119 }
1995 else 2120 else
1996 ENCODE_ISO_CHARACTER (c2, c3, c4); 2121 ENCODE_ISO_CHARACTER (c2, c3, c4);
1997 break; 2122 break;
1998 2123
1999 case EMACS_leading_code_composition:
2000 ONE_MORE_BYTE (c2);
2001 if (c2 < 0xA0)
2002 {
2003 /* invalid sequence */
2004 *dst++ = c1;
2005 src--;
2006 coding->consumed_char++;
2007 }
2008 else if (c2 == 0xFF)
2009 {
2010 ENCODE_RESET_PLANE_AND_REGISTER;
2011 coding->composing = COMPOSING_WITH_RULE_HEAD;
2012 ENCODE_COMPOSITION_WITH_RULE_START;
2013 coding->consumed_char++;
2014 }
2015 else
2016 {
2017 ENCODE_RESET_PLANE_AND_REGISTER;
2018 /* Rewind one byte because it is a character code of
2019 composition elements. */
2020 src--;
2021 coding->composing = COMPOSING_NO_RULE_HEAD;
2022 ENCODE_COMPOSITION_NO_RULE_START;
2023 coding->consumed_char++;
2024 }
2025 break;
2026
2027 case EMACS_invalid_code: 2124 case EMACS_invalid_code:
2028 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) 2125 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2029 ENCODE_RESET_PLANE_AND_REGISTER; 2126 ENCODE_RESET_PLANE_AND_REGISTER;
2030 *dst++ = c1; 2127 *dst++ = c1;
2031 coding->consumed_char++; 2128 coding->consumed_char++;
2045 reset graphic planes and registers to the initial state, and 2142 reset graphic planes and registers to the initial state, and
2046 flush out the carryover if any. */ 2143 flush out the carryover if any. */
2047 if (coding->mode & CODING_MODE_LAST_BLOCK) 2144 if (coding->mode & CODING_MODE_LAST_BLOCK)
2048 { 2145 {
2049 ENCODE_RESET_PLANE_AND_REGISTER; 2146 ENCODE_RESET_PLANE_AND_REGISTER;
2050 if (COMPOSING_P (coding->composing)) 2147 if (COMPOSING_P (coding))
2051 ENCODE_COMPOSITION_END; 2148 *dst++ = ISO_CODE_ESC, *dst++ = '1';
2052 if (result == CODING_FINISH_INSUFFICIENT_SRC) 2149 if (result == CODING_FINISH_INSUFFICIENT_SRC)
2053 { 2150 {
2054 while (src < src_end && dst < dst_end) 2151 while (src < src_end && dst < dst_end)
2055 *dst++ = *src++; 2152 *dst++ = *src++;
2056 } 2153 }
2200 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ 2297 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
2201 coding->fake_multibyte = 1; \ 2298 coding->fake_multibyte = 1; \
2202 } \ 2299 } \
2203 } \ 2300 } \
2204 coding->consumed_char++; \ 2301 coding->consumed_char++; \
2205 } while (0); 2302 } while (0)
2206 2303
2207 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2304 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2208 Check if a text is encoded in SJIS. If it is, return 2305 Check if a text is encoded in SJIS. If it is, return
2209 CODING_CATEGORY_MASK_SJIS, else return 0. */ 2306 CODING_CATEGORY_MASK_SJIS, else return 0. */
2210 2307
2462 TWO_MORE_BYTES). In that case, SRC is reset to SRC_BASE 2559 TWO_MORE_BYTES). In that case, SRC is reset to SRC_BASE
2463 before exiting. */ 2560 before exiting. */
2464 unsigned char *src_base = src; 2561 unsigned char *src_base = src;
2465 unsigned char c1 = *src++, c2, c3, c4; 2562 unsigned char c1 = *src++, c2, c3, c4;
2466 2563
2467 if (coding->composing)
2468 {
2469 if (c1 == 0xA0)
2470 {
2471 ONE_MORE_BYTE (c1);
2472 c1 &= 0x7F;
2473 }
2474 else if (c1 >= 0xA0)
2475 c1 -= 0x20;
2476 else
2477 coding->composing = 0;
2478 }
2479
2480 switch (emacs_code_class[c1]) 2564 switch (emacs_code_class[c1])
2481 { 2565 {
2482 case EMACS_ascii_code: 2566 case EMACS_ascii_code:
2483 ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); 2567 ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2484 break; 2568 break;
2519 break; 2603 break;
2520 2604
2521 case EMACS_leading_code_4: 2605 case EMACS_leading_code_4:
2522 THREE_MORE_BYTES (c2, c3, c4); 2606 THREE_MORE_BYTES (c2, c3, c4);
2523 ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4); 2607 ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2524 break;
2525
2526 case EMACS_leading_code_composition:
2527 coding->composing = 1;
2528 break; 2608 break;
2529 2609
2530 default: /* i.e. case EMACS_invalid_code: */ 2610 default: /* i.e. case EMACS_invalid_code: */
2531 *dst++ = c1; 2611 *dst++ = c1;
2532 coding->consumed_char++; 2612 coding->consumed_char++;
2896 coding->symbol = coding_system; 2976 coding->symbol = coding_system;
2897 coding->common_flags = 0; 2977 coding->common_flags = 0;
2898 coding->mode = 0; 2978 coding->mode = 0;
2899 coding->heading_ascii = -1; 2979 coding->heading_ascii = -1;
2900 coding->post_read_conversion = coding->pre_write_conversion = Qnil; 2980 coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2981 coding->composing = COMPOSITION_DISABLED;
2982 coding->cmp_data = NULL;
2901 2983
2902 if (NILP (coding_system)) 2984 if (NILP (coding_system))
2903 goto label_invalid_coding_system; 2985 goto label_invalid_coding_system;
2904 2986
2905 coding_spec = Fget (coding_system, Qcoding_system); 2987 coding_spec = Fget (coding_system, Qcoding_system);
2941 } 3023 }
2942 else 3024 else
2943 coding->type = coding_type_no_conversion; 3025 coding->type = coding_type_no_conversion;
2944 return 0; 3026 return 0;
2945 } 3027 }
2946
2947 /* Initialize remaining fields. */
2948 coding->composing = 0;
2949 coding->composed_chars = 0;
2950 3028
2951 /* Get values of coding system properties: 3029 /* Get values of coding system properties:
2952 `post-read-conversion', `pre-write-conversion', 3030 `post-read-conversion', `pre-write-conversion',
2953 `translation-table-for-decode', `translation-table-for-encode'. */ 3031 `translation-table-for-decode', `translation-table-for-encode'. */
2954 plist = XVECTOR (coding_spec)->contents[3]; 3032 plist = XVECTOR (coding_spec)->contents[3];
2994 if ((i = get_charset_id (XCAR (val))) >= 0) 3072 if ((i = get_charset_id (XCAR (val))) >= 0)
2995 coding->safe_charsets[i] = 1; 3073 coding->safe_charsets[i] = 1;
2996 val = XCDR (val); 3074 val = XCDR (val);
2997 } 3075 }
2998 } 3076 }
3077
3078 /* If the coding system has non-nil `composition' property, enable
3079 composition handling. */
3080 val = Fplist_get (plist, Qcomposition);
3081 if (!NILP (val))
3082 coding->composing = COMPOSITION_NO;
2999 3083
3000 switch (XFASTINT (coding_type)) 3084 switch (XFASTINT (coding_type))
3001 { 3085 {
3002 case 0: 3086 case 0:
3003 coding->type = coding_type_emacs_mule; 3087 coding->type = coding_type_emacs_mule;
3233 coding->category_idx = CODING_CATEGORY_IDX_BINARY; 3317 coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3234 coding->common_flags = 0; 3318 coding->common_flags = 0;
3235 coding->eol_type = CODING_EOL_LF; 3319 coding->eol_type = CODING_EOL_LF;
3236 coding->pre_write_conversion = coding->post_read_conversion = Qnil; 3320 coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3237 return -1; 3321 return -1;
3322 }
3323
3324 /* Free memory blocks allocated for storing composition information. */
3325
3326 void
3327 coding_free_composition_data (coding)
3328 struct coding_system *coding;
3329 {
3330 struct composition_data *cmp_data = coding->cmp_data, *next;
3331
3332 if (!cmp_data)
3333 return;
3334 /* Memory blocks are chained. At first, rewind to the first, then,
3335 free blocks one by one. */
3336 while (cmp_data->prev)
3337 cmp_data = cmp_data->prev;
3338 while (cmp_data)
3339 {
3340 next = cmp_data->next;
3341 xfree (cmp_data);
3342 cmp_data = next;
3343 }
3344 coding->cmp_data = NULL;
3345 }
3346
3347 /* Set `char_offset' member of all memory blocks pointed by
3348 coding->cmp_data to POS. */
3349
3350 void
3351 coding_adjust_composition_offset (coding, pos)
3352 struct coding_system *coding;
3353 int pos;
3354 {
3355 struct composition_data *cmp_data;
3356
3357 for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3358 cmp_data->char_offset = pos;
3238 } 3359 }
3239 3360
3240 /* Setup raw-text or one of its subsidiaries in the structure 3361 /* Setup raw-text or one of its subsidiaries in the structure
3241 coding_system CODING according to the already setup value eol_type 3362 coding_system CODING according to the already setup value eol_type
3242 in CODING. CODING should be setup for some coding system in 3363 in CODING. CODING should be setup for some coding system in
4244 { 4365 {
4245 inhibit_pre_post_conversion = 0; 4366 inhibit_pre_post_conversion = 0;
4246 return Qnil; 4367 return Qnil;
4247 } 4368 }
4248 4369
4370 /* Store information about all compositions in the range FROM and TO
4371 of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a
4372 buffer or a string, defaults to the current buffer. */
4373
4374 void
4375 coding_save_composition (coding, from, to, obj)
4376 struct coding_system *coding;
4377 int from, to;
4378 Lisp_Object obj;
4379 {
4380 Lisp_Object prop;
4381 int start, end;
4382
4383 coding->composing = COMPOSITION_DISABLED;
4384 if (!find_composition (from, to, &start, &end, &prop, obj)
4385 || end > to)
4386 return;
4387 if (start < from
4388 && (!find_composition (end, to, &start, &end, &prop, obj)
4389 || end > to))
4390 return;
4391 coding->composing = COMPOSITION_NO;
4392 coding_allocate_composition_data (coding, from);
4393 do
4394 {
4395 if (COMPOSITION_VALID_P (start, end, prop))
4396 {
4397 enum composition_method method = COMPOSITION_METHOD (prop);
4398 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
4399 >= COMPOSITION_DATA_SIZE)
4400 coding_allocate_composition_data (coding, from);
4401 /* For relative composition, we remember start and end
4402 positions, for the other compositions, we also remember
4403 components. */
4404 CODING_ADD_COMPOSITION_START (coding, start - from, method);
4405 if (method != COMPOSITION_RELATIVE)
4406 {
4407 /* We must store a*/
4408 Lisp_Object val, ch;
4409
4410 val = COMPOSITION_COMPONENTS (prop);
4411 if (CONSP (val))
4412 while (CONSP (val))
4413 {
4414 ch = XCAR (val), val = XCDR (val);
4415 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4416 }
4417 else if (VECTORP (val) || STRINGP (val))
4418 {
4419 int len = (VECTORP (val)
4420 ? XVECTOR (val)->size : XSTRING (val)->size);
4421 int i;
4422 for (i = 0; i < len; i++)
4423 {
4424 ch = (STRINGP (val)
4425 ? Faref (val, make_number (i))
4426 : XVECTOR (val)->contents[i]);
4427 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
4428 }
4429 }
4430 else /* INTEGERP (val) */
4431 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
4432 }
4433 CODING_ADD_COMPOSITION_END (coding, end - from);
4434 }
4435 start = end;
4436 }
4437 while (start < to
4438 && find_composition (start, to, &start, &end, &prop, obj)
4439 && end <= to);
4440
4441 /* Make coding->cmp_data point to the first memory block. */
4442 while (coding->cmp_data->prev)
4443 coding->cmp_data = coding->cmp_data->prev;
4444 coding->cmp_data_start = 0;
4445 }
4446
4447 /* Reflect the saved information about compositions to OBJ.
4448 CODING->cmp_data points to a memory block for the informaiton. OBJ
4449 is a buffer or a string, defaults to the current buffer. */
4450
4451 static void
4452 coding_restore_composition (coding, obj)
4453 struct coding_system *coding;
4454 Lisp_Object obj;
4455 {
4456 struct composition_data *cmp_data = coding->cmp_data;
4457
4458 if (!cmp_data)
4459 return;
4460
4461 while (cmp_data->prev)
4462 cmp_data = cmp_data->prev;
4463
4464 while (cmp_data)
4465 {
4466 int i;
4467
4468 for (i = 0; i < cmp_data->used; i += cmp_data->data[i])
4469 {
4470 int *data = cmp_data->data + i;
4471 enum composition_method method = (enum composition_method) data[3];
4472 Lisp_Object components;
4473
4474 if (method == COMPOSITION_RELATIVE)
4475 components = Qnil;
4476 else
4477 {
4478 int len = data[0] - 4, j;
4479 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
4480
4481 for (j = 0; j < len; j++)
4482 args[j] = make_number (data[4 + j]);
4483 components = (method == COMPOSITION_WITH_ALTCHARS
4484 ? Fstring (len, args) : Fvector (len, args));
4485 }
4486 compose_text (data[1], data[2], components, Qnil, obj);
4487 }
4488 cmp_data = cmp_data->next;
4489 }
4490 }
4491
4249 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the 4492 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
4250 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by 4493 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
4251 coding system CODING, and return the status code of code conversion 4494 coding system CODING, and return the status code of code conversion
4252 (currently, this value has no meaning). 4495 (currently, this value has no meaning).
4253 4496
4327 encounter an inconsitent eol format while decoding. */ 4570 encounter an inconsitent eol format while decoding. */
4328 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; 4571 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4329 } 4572 }
4330 } 4573 }
4331 4574
4332 coding->consumed_char = len, coding->consumed = len_byte;
4333
4334 if (encodep 4575 if (encodep
4335 ? ! CODING_REQUIRE_ENCODING (coding) 4576 ? ! CODING_REQUIRE_ENCODING (coding)
4336 : ! CODING_REQUIRE_DECODING (coding)) 4577 : ! CODING_REQUIRE_DECODING (coding))
4337 { 4578 {
4579 coding->consumed_char = len;
4580 coding->consumed = len_byte;
4338 coding->produced = len_byte; 4581 coding->produced = len_byte;
4339 if (multibyte 4582 if (multibyte
4340 && ! replace 4583 && ! replace
4341 /* See the comment of the member heading_ascii in coding.h. */ 4584 /* See the comment of the member heading_ascii in coding.h. */
4342 && coding->heading_ascii < len_byte) 4585 && coding->heading_ascii < len_byte)
4405 } 4648 }
4406 4649
4407 if (replace) 4650 if (replace)
4408 deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1); 4651 deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4409 4652
4410 /* Try to skip the heading and tailing ASCIIs. */ 4653 if (coding->composing != COMPOSITION_DISABLED)
4411 { 4654 {
4412 int from_byte_orig = from_byte, to_byte_orig = to_byte; 4655 if (encodep)
4413 4656 coding_save_composition (coding, from, to, Fcurrent_buffer ());
4414 if (from < GPT && GPT < to) 4657 else
4415 move_gap_both (from, from_byte); 4658 coding_allocate_composition_data (coding, from);
4416 SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep); 4659 }
4417 if (from_byte == to_byte 4660
4418 && coding->type != coding_type_ccl 4661 /* For conversion by CCL program and for encoding with composition
4419 && ! (coding->mode & CODING_MODE_LAST_BLOCK 4662 handling, we can't skip any character because we may convert or
4420 && CODING_REQUIRE_FLUSHING (coding))) 4663 compose even ASCII characters. */
4421 { 4664 if (coding->type != coding_type_ccl
4422 coding->produced = len_byte; 4665 && (!encodep || coding->cmp_data == NULL))
4423 coding->produced_char = multibyte ? len : len_byte; 4666 {
4424 if (!replace) 4667 /* Try to skip the heading and tailing ASCIIs. */
4425 /* We must record and adjust for this new text now. */ 4668 int from_byte_orig = from_byte, to_byte_orig = to_byte;
4426 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); 4669
4427 return 0; 4670 if (from < GPT && GPT < to)
4428 } 4671 move_gap_both (from, from_byte);
4429 4672 SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4430 head_skip = from_byte - from_byte_orig; 4673 if (from_byte == to_byte
4431 tail_skip = to_byte_orig - to_byte; 4674 && (encodep || NILP (coding->post_read_conversion))
4432 total_skip = head_skip + tail_skip; 4675 && ! CODING_REQUIRE_FLUSHING (coding))
4433 from += head_skip; 4676 {
4434 to -= tail_skip; 4677 coding->produced = len_byte;
4435 len -= total_skip; len_byte -= total_skip; 4678 coding->produced_char = multibyte ? len : len_byte;
4436 } 4679 if (!replace)
4680 /* We must record and adjust for this new text now. */
4681 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4682 return 0;
4683 }
4684
4685 head_skip = from_byte - from_byte_orig;
4686 tail_skip = to_byte_orig - to_byte;
4687 total_skip = head_skip + tail_skip;
4688 from += head_skip;
4689 to -= tail_skip;
4690 len -= total_skip; len_byte -= total_skip;
4691
4692 if (coding->cmp_data)
4693 coding->cmp_data->char_offset = from;
4694 }
4437 4695
4438 /* The code conversion routine can not preserve text properties for 4696 /* The code conversion routine can not preserve text properties for
4439 now. So, we must remove all text properties in the region. 4697 now. So, we must remove all text properties in the region.
4440 Here, we must suppress all modification hooks. */ 4698 Here, we must suppress all modification hooks. */
4441 if (replace) 4699 if (replace)
4456 if (GAP_SIZE < require) 4714 if (GAP_SIZE < require)
4457 make_gap (require - GAP_SIZE); 4715 make_gap (require - GAP_SIZE);
4458 move_gap_both (from, from_byte); 4716 move_gap_both (from, from_byte);
4459 4717
4460 inserted = inserted_byte = 0; 4718 inserted = inserted_byte = 0;
4461 src = GAP_END_ADDR, dst = GPT_ADDR;
4462 4719
4463 GAP_SIZE += len_byte; 4720 GAP_SIZE += len_byte;
4464 ZV -= len; 4721 ZV -= len;
4465 Z -= len; 4722 Z -= len;
4466 ZV_BYTE -= len_byte; 4723 ZV_BYTE -= len_byte;
4473 4730
4474 for (;;) 4731 for (;;)
4475 { 4732 {
4476 int result; 4733 int result;
4477 4734
4478 /* The buffer memory is changed from: 4735 /* The buffer memory is now:
4479 +--------+converted-text+---------+-------original-text------+---+ 4736 +--------+converted-text+---------+-------original-text------+---+
4480 |<-from->|<--inserted-->|---------|<-----------len---------->|---| 4737 |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4481 |<------------------- GAP_SIZE -------------------->| */ 4738 |<------------------- GAP_SIZE -------------------->| */
4739 src = GAP_END_ADDR - len_byte;
4740 dst = GPT_ADDR + inserted_byte;
4741
4482 if (encodep) 4742 if (encodep)
4483 result = encode_coding (coding, src, dst, len_byte, 0); 4743 result = encode_coding (coding, src, dst, len_byte, 0);
4484 else 4744 else
4485 result = decode_coding (coding, src, dst, len_byte, 0); 4745 result = decode_coding (coding, src, dst, len_byte, 0);
4486 /* to: 4746
4747 /* The buffer memory is now:
4487 +--------+-------converted-text--------+--+---original-text--+---+ 4748 +--------+-------converted-text--------+--+---original-text--+---+
4488 |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---| 4749 |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4489 |<------------------- GAP_SIZE -------------------->| */ 4750 |<------------------- GAP_SIZE -------------------->| */
4751
4490 if (coding->fake_multibyte) 4752 if (coding->fake_multibyte)
4491 fake_multibyte = 1; 4753 fake_multibyte = 1;
4492 4754
4493 if (!encodep && !multibyte) 4755 if (!encodep && !multibyte)
4494 coding->produced_char = coding->produced; 4756 coding->produced_char = coding->produced;
4495 inserted += coding->produced_char; 4757 inserted += coding->produced_char;
4496 inserted_byte += coding->produced; 4758 inserted_byte += coding->produced;
4497 len_byte -= coding->consumed; 4759 len_byte -= coding->consumed;
4760
4761 if (result == CODING_FINISH_INSUFFICIENT_CMP)
4762 {
4763 coding_allocate_composition_data (coding, from + inserted);
4764 continue;
4765 }
4766
4498 src += coding->consumed; 4767 src += coding->consumed;
4499 dst += coding->produced; 4768 dst += coding->produced;
4500 4769
4501 if (result == CODING_FINISH_NORMAL) 4770 if (result == CODING_FINISH_NORMAL)
4502 { 4771 {
4624 GPT += inserted_byte; GPT_BYTE += inserted_byte; 4893 GPT += inserted_byte; GPT_BYTE += inserted_byte;
4625 make_gap (require + 2000); 4894 make_gap (require + 2000);
4626 GAP_SIZE += add; 4895 GAP_SIZE += add;
4627 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; 4896 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4628 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; 4897 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4629 /* Don't forget to update SRC, DST. */
4630 src = GAP_END_ADDR - len_byte;
4631 dst = GPT_ADDR + inserted_byte;
4632 } 4898 }
4633 } 4899 }
4634 if (src - dst > 0) *dst = 0; /* Put an anchor. */ 4900 if (src - dst > 0) *dst = 0; /* Put an anchor. */
4635 4901
4636 if (multibyte 4902 if (multibyte
4654 } 4920 }
4655 4921
4656 prev_Z = Z; 4922 prev_Z = Z;
4657 adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte); 4923 adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4658 inserted = Z - prev_Z; 4924 inserted = Z - prev_Z;
4925
4926 if (!encodep && coding->cmp_data && coding->cmp_data->used)
4927 coding_restore_composition (coding, Fcurrent_buffer ());
4928 coding_free_composition_data (coding);
4659 4929
4660 if (! encodep && ! NILP (coding->post_read_conversion)) 4930 if (! encodep && ! NILP (coding->post_read_conversion))
4661 { 4931 {
4662 Lisp_Object val; 4932 Lisp_Object val;
4663 int count = specpdl_ptr - specpdl; 4933 int count = specpdl_ptr - specpdl;
4684 else 4954 else
4685 orig_point = from; 4955 orig_point = from;
4686 TEMP_SET_PT (orig_point); 4956 TEMP_SET_PT (orig_point);
4687 } 4957 }
4688 4958
4689 signal_after_change (from, to - from, inserted); 4959 if (replace)
4960 {
4961 signal_after_change (from, to - from, inserted);
4962 update_compositions (from, to, CHECK_BORDER);
4963 }
4690 4964
4691 { 4965 {
4692 coding->consumed = to_byte - from_byte; 4966 coding->consumed = to_byte - from_byte;
4693 coding->consumed_char = to - from; 4967 coding->consumed_char = to - from;
4694 coding->produced = inserted_byte; 4968 coding->produced = inserted_byte;
4766 } 5040 }
4767 5041
4768 if (encodep 5042 if (encodep
4769 ? ! CODING_REQUIRE_ENCODING (coding) 5043 ? ! CODING_REQUIRE_ENCODING (coding)
4770 : ! CODING_REQUIRE_DECODING (coding)) 5044 : ! CODING_REQUIRE_DECODING (coding))
4771 from = to_byte; 5045 return (nocopy ? str : Fcopy_sequence (str));
4772 else 5046
5047 if (coding->composing != COMPOSITION_DISABLED)
5048 {
5049 if (encodep)
5050 coding_save_composition (coding, from, to, str);
5051 else
5052 coding_allocate_composition_data (coding, from);
5053 }
5054
5055 /* For conversion by CCL program and for encoding with composition
5056 handling, we can't skip any character because we may convert or
5057 compose even ASCII characters. */
5058 if (coding->type != coding_type_ccl
5059 && (!encodep || coding->cmp_data == NULL))
4773 { 5060 {
4774 /* Try to skip the heading and tailing ASCIIs. */ 5061 /* Try to skip the heading and tailing ASCIIs. */
5062 int from_orig = from;
5063
4775 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, 5064 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
4776 encodep); 5065 encodep);
4777 } 5066 if (from == to_byte)
4778 if (from == to_byte 5067 return (nocopy ? str : Fcopy_sequence (str));
4779 && coding->type != coding_type_ccl) 5068
4780 return (nocopy ? str : Fcopy_sequence (str)); 5069 if (coding->cmp_data)
5070 coding->cmp_data->char_offset = from;
5071 }
4781 5072
4782 if (encodep) 5073 if (encodep)
4783 len = encoding_buffer_size (coding, to_byte - from); 5074 len = encoding_buffer_size (coding, to_byte - from);
4784 else 5075 else
4785 len = decoding_buffer_size (coding, to_byte - from); 5076 len = decoding_buffer_size (coding, to_byte - from);
4795 buf + from, to_byte - from, len) 5086 buf + from, to_byte - from, len)
4796 : decode_coding (coding, XSTRING (str)->data + from, 5087 : decode_coding (coding, XSTRING (str)->data + from,
4797 buf + from, to_byte - from, len)); 5088 buf + from, to_byte - from, len));
4798 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) 5089 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4799 { 5090 {
4800 /* We simple try to decode the whole string again but without 5091 /* We simply try to decode the whole string again but without
4801 eol-conversion this time. */ 5092 eol-conversion this time. */
4802 coding->eol_type = CODING_EOL_LF; 5093 coding->eol_type = CODING_EOL_LF;
4803 coding->symbol = saved_coding_symbol; 5094 coding->symbol = saved_coding_symbol;
5095 coding_free_composition_data (coding);
4804 return code_convert_string (str, coding, encodep, nocopy); 5096 return code_convert_string (str, coding, encodep, nocopy);
4805 } 5097 }
4806 5098
4807 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced, 5099 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4808 STRING_BYTES (XSTRING (str)) - to_byte); 5100 STRING_BYTES (XSTRING (str)) - to_byte);
4816 ? multibyte_chars_in_text (buf + from, coding->produced) 5108 ? multibyte_chars_in_text (buf + from, coding->produced)
4817 : coding->produced_char); 5109 : coding->produced_char);
4818 str = make_multibyte_string (buf, len + chars, len + coding->produced); 5110 str = make_multibyte_string (buf, len + chars, len + coding->produced);
4819 } 5111 }
4820 5112
5113 if (!encodep && coding->cmp_data && coding->cmp_data->used)
5114 coding_restore_composition (coding, str);
5115
5116 coding_free_composition_data (coding);
4821 return str; 5117 return str;
4822 } 5118 }
4823 5119
4824 5120
4825 #ifdef emacs 5121 #ifdef emacs
5075 5371
5076 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 5372 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5077 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); 5373 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5078 5374
5079 coding.mode |= CODING_MODE_LAST_BLOCK; 5375 coding.mode |= CODING_MODE_LAST_BLOCK;
5376 string = code_convert_string (string, &coding, encodep, !NILP (nocopy));
5080 Vlast_coding_system_used = coding.symbol; 5377 Vlast_coding_system_used = coding.symbol;
5081 return code_convert_string (string, &coding, encodep, !NILP (nocopy)); 5378
5379 return string;
5082 } 5380 }
5083 5381
5084 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, 5382 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
5085 2, 3, 0, 5383 2, 3, 0,
5086 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\ 5384 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
5108 { 5406 {
5109 return code_convert_string1 (string, coding_system, nocopy, 1); 5407 return code_convert_string1 (string, coding_system, nocopy, 1);
5110 } 5408 }
5111 5409
5112 /* Encode or decode STRING according to CODING_SYSTEM. 5410 /* Encode or decode STRING according to CODING_SYSTEM.
5113 Do not set Vlast_coding_system_used. */ 5411 Do not set Vlast_coding_system_used.
5412
5413 This function is called only from macros DECODE_FILE and
5414 ENCODE_FILE, thus we ignore character composition. */
5114 5415
5115 Lisp_Object 5416 Lisp_Object
5116 code_convert_string_norecord (string, coding_system, encodep) 5417 code_convert_string_norecord (string, coding_system, encodep)
5117 Lisp_Object string, coding_system; 5418 Lisp_Object string, coding_system;
5118 int encodep; 5419 int encodep;
5126 return string; 5427 return string;
5127 5428
5128 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 5429 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5129 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); 5430 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5130 5431
5432 coding.composing = COMPOSITION_DISABLED;
5131 coding.mode |= CODING_MODE_LAST_BLOCK; 5433 coding.mode |= CODING_MODE_LAST_BLOCK;
5132 return code_convert_string (string, &coding, encodep, Qt); 5434 return code_convert_string (string, &coding, encodep, Qt);
5133 } 5435 }
5134 5436
5135 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, 5437 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
5260 { 5562 {
5261 CHECK_SYMBOL (coding_system, 0); 5563 CHECK_SYMBOL (coding_system, 0);
5262 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); 5564 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
5263 /* We had better not send unsafe characters to terminal. */ 5565 /* We had better not send unsafe characters to terminal. */
5264 terminal_coding.flags |= CODING_FLAG_ISO_SAFE; 5566 terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
5265 5567 /* Characer composition should be disabled. */
5568 terminal_coding.composing = COMPOSITION_DISABLED;
5266 return Qnil; 5569 return Qnil;
5267 } 5570 }
5268 5571
5269 DEFUN ("set-safe-terminal-coding-system-internal", 5572 DEFUN ("set-safe-terminal-coding-system-internal",
5270 Fset_safe_terminal_coding_system_internal, 5573 Fset_safe_terminal_coding_system_internal,
5273 Lisp_Object coding_system; 5576 Lisp_Object coding_system;
5274 { 5577 {
5275 CHECK_SYMBOL (coding_system, 0); 5578 CHECK_SYMBOL (coding_system, 0);
5276 setup_coding_system (Fcheck_coding_system (coding_system), 5579 setup_coding_system (Fcheck_coding_system (coding_system),
5277 &safe_terminal_coding); 5580 &safe_terminal_coding);
5581 /* Characer composition should be disabled. */
5582 safe_terminal_coding.composing = COMPOSITION_DISABLED;
5278 return Qnil; 5583 return Qnil;
5279 } 5584 }
5280 5585
5281 DEFUN ("terminal-coding-system", 5586 DEFUN ("terminal-coding-system",
5282 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0, 5587 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
5292 (coding_system) 5597 (coding_system)
5293 Lisp_Object coding_system; 5598 Lisp_Object coding_system;
5294 { 5599 {
5295 CHECK_SYMBOL (coding_system, 0); 5600 CHECK_SYMBOL (coding_system, 0);
5296 setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding); 5601 setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
5602 /* Characer composition should be disabled. */
5603 keyboard_coding.composing = COMPOSITION_DISABLED;
5297 return Qnil; 5604 return Qnil;
5298 } 5605 }
5299 5606
5300 DEFUN ("keyboard-coding-system", 5607 DEFUN ("keyboard-coding-system",
5301 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0, 5608 Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
5487 emacs_code_class[0x0A] = EMACS_linefeed_code; 5794 emacs_code_class[0x0A] = EMACS_linefeed_code;
5488 emacs_code_class[0x0D] = EMACS_carriage_return_code; 5795 emacs_code_class[0x0D] = EMACS_carriage_return_code;
5489 for (i = 0x21 ; i < 0x7F; i++) 5796 for (i = 0x21 ; i < 0x7F; i++)
5490 emacs_code_class[i] = EMACS_ascii_code; 5797 emacs_code_class[i] = EMACS_ascii_code;
5491 emacs_code_class[0x7F] = EMACS_control_code; 5798 emacs_code_class[0x7F] = EMACS_control_code;
5492 emacs_code_class[0x80] = EMACS_leading_code_composition; 5799 for (i = 0x80; i < 0xFF; i++)
5493 for (i = 0x81; i < 0xFF; i++)
5494 emacs_code_class[i] = EMACS_invalid_code; 5800 emacs_code_class[i] = EMACS_invalid_code;
5495 emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3; 5801 emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5496 emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3; 5802 emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5497 emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4; 5803 emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5498 emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4; 5804 emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;