Mercurial > emacs
comparison src/coding.c @ 92992:49c4ea77b83a
(CHAR_STRING_ADVANCE_NO_UNIFY)
(STRING_CHAR_ADVANCE_NO_UNIFY): New macros.
(coding_alloc_by_making_gap): Fix the way to preserve data in the
gap.
(alloc_destination): Fix the 2nd arg to
coding_alloc_by_making_gap.
(encode_coding_utf_8): Use CHAR_STRING_ADVANCE_NO_UNIFY instead of
CHAR_STRING_ADVANCE.
(produce_chars): Fix for the case that the source and the
destination are the same buffer. Use CHAR_STRING_ADVANCE_NO_UNIFY
instead of CHAR_STRING_ADVANCE.
(consume_chars): Use STRING_CHAR_ADVANCE_NO_UNIFY instead of
STRING_CHAR_ADVANCE.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Sun, 16 Mar 2008 01:24:55 +0000 |
| parents | 5f5f07a5c076 |
| children | 212fa666680e |
comparison
equal
deleted
inserted
replaced
| 92991:24a6717aed7f | 92992:49c4ea77b83a |
|---|---|
| 970 dst_end = coding->destination + coding->dst_bytes; \ | 970 dst_end = coding->destination + coding->dst_bytes; \ |
| 971 } \ | 971 } \ |
| 972 } while (0) | 972 } while (0) |
| 973 | 973 |
| 974 | 974 |
| 975 /* Store multibyte form of the character C in P, and advance P to the | |
| 976 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it | |
| 977 never calls MAYBE_UNIFY_CHAR. */ | |
| 978 | |
| 979 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \ | |
| 980 do { \ | |
| 981 if ((c) <= MAX_1_BYTE_CHAR) \ | |
| 982 *(p)++ = (c); \ | |
| 983 else if ((c) <= MAX_2_BYTE_CHAR) \ | |
| 984 *(p)++ = (0xC0 | ((c) >> 6)), \ | |
| 985 *(p)++ = (0x80 | ((c) & 0x3F)); \ | |
| 986 else if ((c) <= MAX_3_BYTE_CHAR) \ | |
| 987 *(p)++ = (0xE0 | ((c) >> 12)), \ | |
| 988 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \ | |
| 989 *(p)++ = (0x80 | ((c) & 0x3F)); \ | |
| 990 else if ((c) <= MAX_4_BYTE_CHAR) \ | |
| 991 *(p)++ = (0xF0 | (c >> 18)), \ | |
| 992 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ | |
| 993 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ | |
| 994 *(p)++ = (0x80 | (c & 0x3F)); \ | |
| 995 else if ((c) <= MAX_5_BYTE_CHAR) \ | |
| 996 *(p)++ = 0xF8, \ | |
| 997 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \ | |
| 998 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ | |
| 999 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ | |
| 1000 *(p)++ = (0x80 | (c & 0x3F)); \ | |
| 1001 else \ | |
| 1002 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \ | |
| 1003 } while (0) | |
| 1004 | |
| 1005 | |
| 1006 /* Return the character code of character whose multibyte form is at | |
| 1007 P, and advance P to the end of the multibyte form. This is like | |
| 1008 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */ | |
| 1009 | |
| 1010 #define STRING_CHAR_ADVANCE_NO_UNIFY(p) \ | |
| 1011 (!((p)[0] & 0x80) \ | |
| 1012 ? *(p)++ \ | |
| 1013 : ! ((p)[0] & 0x20) \ | |
| 1014 ? ((p) += 2, \ | |
| 1015 ((((p)[-2] & 0x1F) << 6) \ | |
| 1016 | ((p)[-1] & 0x3F) \ | |
| 1017 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \ | |
| 1018 : ! ((p)[0] & 0x10) \ | |
| 1019 ? ((p) += 3, \ | |
| 1020 ((((p)[-3] & 0x0F) << 12) \ | |
| 1021 | (((p)[-2] & 0x3F) << 6) \ | |
| 1022 | ((p)[-1] & 0x3F))) \ | |
| 1023 : ! ((p)[0] & 0x08) \ | |
| 1024 ? ((p) += 4, \ | |
| 1025 ((((p)[-4] & 0xF) << 18) \ | |
| 1026 | (((p)[-3] & 0x3F) << 12) \ | |
| 1027 | (((p)[-2] & 0x3F) << 6) \ | |
| 1028 | ((p)[-1] & 0x3F))) \ | |
| 1029 : ((p) += 5, \ | |
| 1030 ((((p)[-4] & 0x3F) << 18) \ | |
| 1031 | (((p)[-3] & 0x3F) << 12) \ | |
| 1032 | (((p)[-2] & 0x3F) << 6) \ | |
| 1033 | ((p)[-1] & 0x3F)))) | |
| 1034 | |
| 975 | 1035 |
| 976 static void | 1036 static void |
| 977 coding_set_source (coding) | 1037 coding_set_source (coding) |
| 978 struct coding_system *coding; | 1038 struct coding_system *coding; |
| 979 { | 1039 { |
| 1035 coding->dst_bytes + bytes); | 1095 coding->dst_bytes + bytes); |
| 1036 coding->dst_bytes += bytes; | 1096 coding->dst_bytes += bytes; |
| 1037 } | 1097 } |
| 1038 | 1098 |
| 1039 static void | 1099 static void |
| 1040 coding_alloc_by_making_gap (coding, offset, bytes) | 1100 coding_alloc_by_making_gap (coding, gap_head_used, bytes) |
| 1041 struct coding_system *coding; | 1101 struct coding_system *coding; |
| 1042 EMACS_INT offset, bytes; | 1102 EMACS_INT gap_head_used, bytes; |
| 1043 { | 1103 { |
| 1044 if (BUFFERP (coding->dst_object) | 1104 if (EQ (coding->src_object, coding->dst_object)) |
| 1045 && EQ (coding->src_object, coding->dst_object)) | 1105 { |
| 1046 { | 1106 /* The gap may contain the produced data at the head and not-yet |
| 1047 EMACS_INT add = offset + (coding->src_bytes - coding->consumed); | 1107 consumed data at the tail. To preserve those data, we at |
| 1048 | 1108 first make the gap size to zero, then increase the gap |
| 1049 GPT += offset, GPT_BYTE += offset; | 1109 size. */ |
| 1050 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; | 1110 EMACS_INT add = GAP_SIZE; |
| 1111 | |
| 1112 GPT += gap_head_used, GPT_BYTE += gap_head_used; | |
| 1113 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; | |
| 1051 make_gap (bytes); | 1114 make_gap (bytes); |
| 1052 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; | 1115 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; |
| 1053 GPT -= offset, GPT_BYTE -= offset; | 1116 GPT -= gap_head_used, GPT_BYTE -= gap_head_used; |
| 1054 } | 1117 } |
| 1055 else | 1118 else |
| 1056 { | 1119 { |
| 1057 Lisp_Object this_buffer; | 1120 Lisp_Object this_buffer; |
| 1058 | 1121 |
| 1071 unsigned char *dst; | 1134 unsigned char *dst; |
| 1072 { | 1135 { |
| 1073 EMACS_INT offset = dst - coding->destination; | 1136 EMACS_INT offset = dst - coding->destination; |
| 1074 | 1137 |
| 1075 if (BUFFERP (coding->dst_object)) | 1138 if (BUFFERP (coding->dst_object)) |
| 1076 coding_alloc_by_making_gap (coding, offset, nbytes); | 1139 { |
| 1140 struct buffer *buf = XBUFFER (coding->dst_object); | |
| 1141 | |
| 1142 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes); | |
| 1143 } | |
| 1077 else | 1144 else |
| 1078 coding_alloc_by_realloc (coding, nbytes); | 1145 coding_alloc_by_realloc (coding, nbytes); |
| 1079 record_conversion_result (coding, CODING_RESULT_SUCCESS); | 1146 record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 1080 coding_set_destination (coding); | 1147 coding_set_destination (coding); |
| 1081 dst = coding->destination + offset; | 1148 dst = coding->destination + offset; |
| 1363 c = CHAR_TO_BYTE8 (c); | 1430 c = CHAR_TO_BYTE8 (c); |
| 1364 EMIT_ONE_BYTE (c); | 1431 EMIT_ONE_BYTE (c); |
| 1365 } | 1432 } |
| 1366 else | 1433 else |
| 1367 { | 1434 { |
| 1368 CHAR_STRING_ADVANCE (c, pend); | 1435 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend); |
| 1369 for (p = str; p < pend; p++) | 1436 for (p = str; p < pend; p++) |
| 1370 EMIT_ONE_BYTE (*p); | 1437 EMIT_ONE_BYTE (*p); |
| 1371 } | 1438 } |
| 1372 } | 1439 } |
| 1373 } | 1440 } |
| 1380 ASSURE_DESTINATION (safe_room); | 1447 ASSURE_DESTINATION (safe_room); |
| 1381 c = *charbuf++; | 1448 c = *charbuf++; |
| 1382 if (CHAR_BYTE8_P (c)) | 1449 if (CHAR_BYTE8_P (c)) |
| 1383 *dst++ = CHAR_TO_BYTE8 (c); | 1450 *dst++ = CHAR_TO_BYTE8 (c); |
| 1384 else | 1451 else |
| 1385 dst += CHAR_STRING (c, dst); | 1452 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); |
| 1386 produced_chars++; | 1453 produced_chars++; |
| 1387 } | 1454 } |
| 1388 } | 1455 } |
| 1389 record_conversion_result (coding, CODING_RESULT_SUCCESS); | 1456 record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 1390 coding->produced_char += produced_chars; | 1457 coding->produced_char += produced_chars; |
| 5969 { | 6036 { |
| 5970 /* Source characters are in coding->charbuf. */ | 6037 /* Source characters are in coding->charbuf. */ |
| 5971 int *buf = coding->charbuf; | 6038 int *buf = coding->charbuf; |
| 5972 int *buf_end = buf + coding->charbuf_used; | 6039 int *buf_end = buf + coding->charbuf_used; |
| 5973 | 6040 |
| 5974 if (BUFFERP (coding->src_object) | 6041 if (EQ (coding->src_object, coding->dst_object)) |
| 5975 && EQ (coding->src_object, coding->dst_object)) | 6042 { |
| 5976 dst_end = ((unsigned char *) coding->source) + coding->consumed; | 6043 coding_set_source (coding); |
| 6044 dst_end = ((unsigned char *) coding->source) + coding->consumed; | |
| 6045 } | |
| 5977 | 6046 |
| 5978 while (buf < buf_end) | 6047 while (buf < buf_end) |
| 5979 { | 6048 { |
| 5980 int c = *buf, i; | 6049 int c = *buf, i; |
| 5981 | 6050 |
| 5998 { | 6067 { |
| 5999 dst = alloc_destination (coding, | 6068 dst = alloc_destination (coding, |
| 6000 buf_end - buf | 6069 buf_end - buf |
| 6001 + MAX_MULTIBYTE_LENGTH * to_nchars, | 6070 + MAX_MULTIBYTE_LENGTH * to_nchars, |
| 6002 dst); | 6071 dst); |
| 6003 dst_end = coding->destination + coding->dst_bytes; | 6072 if (EQ (coding->src_object, coding->dst_object)) |
| 6073 { | |
| 6074 coding_set_source (coding); | |
| 6075 dst_end = ((unsigned char *) coding->source) + coding->consumed; | |
| 6076 } | |
| 6077 else | |
| 6078 dst_end = coding->destination + coding->dst_bytes; | |
| 6004 } | 6079 } |
| 6005 | 6080 |
| 6006 for (i = 0; i < to_nchars; i++) | 6081 for (i = 0; i < to_nchars; i++) |
| 6007 { | 6082 { |
| 6008 if (i > 0) | 6083 if (i > 0) |
| 6009 c = XINT (AREF (trans, i)); | 6084 c = XINT (AREF (trans, i)); |
| 6010 if (coding->dst_multibyte | 6085 if (coding->dst_multibyte |
| 6011 || ! CHAR_BYTE8_P (c)) | 6086 || ! CHAR_BYTE8_P (c)) |
| 6012 CHAR_STRING_ADVANCE (c, dst); | 6087 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); |
| 6013 else | 6088 else |
| 6014 *dst++ = CHAR_TO_BYTE8 (c); | 6089 *dst++ = CHAR_TO_BYTE8 (c); |
| 6015 } | 6090 } |
| 6016 produced_chars += to_nchars; | 6091 produced_chars += to_nchars; |
| 6017 *buf++ = to_nchars; | 6092 *buf++ = to_nchars; |
| 6028 { | 6103 { |
| 6029 /* Source characters are at coding->source. */ | 6104 /* Source characters are at coding->source. */ |
| 6030 const unsigned char *src = coding->source; | 6105 const unsigned char *src = coding->source; |
| 6031 const unsigned char *src_end = src + coding->consumed; | 6106 const unsigned char *src_end = src + coding->consumed; |
| 6032 | 6107 |
| 6108 if (EQ (coding->dst_object, coding->src_object)) | |
| 6109 dst_end = (unsigned char *) src; | |
| 6033 if (coding->src_multibyte != coding->dst_multibyte) | 6110 if (coding->src_multibyte != coding->dst_multibyte) |
| 6034 { | 6111 { |
| 6035 if (coding->src_multibyte) | 6112 if (coding->src_multibyte) |
| 6036 { | 6113 { |
| 6037 int multibytep = 1; | 6114 int multibytep = 1; |
| 6055 dst); | 6132 dst); |
| 6056 dst_end = coding->destination + coding->dst_bytes; | 6133 dst_end = coding->destination + coding->dst_bytes; |
| 6057 coding_set_source (coding); | 6134 coding_set_source (coding); |
| 6058 src = coding->source + offset; | 6135 src = coding->source + offset; |
| 6059 src_end = coding->source + coding->src_bytes; | 6136 src_end = coding->source + coding->src_bytes; |
| 6137 if (EQ (coding->src_object, coding->dst_object)) | |
| 6138 dst_end = (unsigned char *) src; | |
| 6060 } | 6139 } |
| 6061 } | 6140 } |
| 6062 *dst++ = c; | 6141 *dst++ = c; |
| 6063 produced_chars++; | 6142 produced_chars++; |
| 6064 } | 6143 } |
| 6076 if (EQ (coding->src_object, coding->dst_object)) | 6155 if (EQ (coding->src_object, coding->dst_object)) |
| 6077 dst_end = (unsigned char *) src; | 6156 dst_end = (unsigned char *) src; |
| 6078 if (dst >= dst_end - 1) | 6157 if (dst >= dst_end - 1) |
| 6079 { | 6158 { |
| 6080 EMACS_INT offset = src - coding->source; | 6159 EMACS_INT offset = src - coding->source; |
| 6081 | 6160 EMACS_INT more_bytes; |
| 6082 dst = alloc_destination (coding, src_end - src + 2, | 6161 |
| 6083 dst); | 6162 if (EQ (coding->src_object, coding->dst_object)) |
| 6163 more_bytes = ((src_end - src) / 2) + 2; | |
| 6164 else | |
| 6165 more_bytes = src_end - src + 2; | |
| 6166 dst = alloc_destination (coding, more_bytes, dst); | |
| 6084 dst_end = coding->destination + coding->dst_bytes; | 6167 dst_end = coding->destination + coding->dst_bytes; |
| 6085 coding_set_source (coding); | 6168 coding_set_source (coding); |
| 6086 src = coding->source + offset; | 6169 src = coding->source + offset; |
| 6087 src_end = coding->source + coding->src_bytes; | 6170 src_end = coding->source + coding->src_bytes; |
| 6171 if (EQ (coding->src_object, coding->dst_object)) | |
| 6172 dst_end = (unsigned char *) src; | |
| 6088 } | 6173 } |
| 6089 } | 6174 } |
| 6090 EMIT_ONE_BYTE (c); | 6175 EMIT_ONE_BYTE (c); |
| 6091 } | 6176 } |
| 6092 } | 6177 } |
| 6570 EMACS_INT bytes; | 6655 EMACS_INT bytes; |
| 6571 | 6656 |
| 6572 if (coding->encoder == encode_coding_raw_text) | 6657 if (coding->encoder == encode_coding_raw_text) |
| 6573 c = *src++, pos++; | 6658 c = *src++, pos++; |
| 6574 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0) | 6659 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0) |
| 6575 c = STRING_CHAR_ADVANCE (src), pos += bytes; | 6660 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes; |
| 6576 else | 6661 else |
| 6577 c = BYTE8_TO_CHAR (*src), src++, pos++; | 6662 c = BYTE8_TO_CHAR (*src), src++, pos++; |
| 6578 } | 6663 } |
| 6579 else | 6664 else |
| 6580 c = STRING_CHAR_ADVANCE (src), pos++; | 6665 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++; |
| 6581 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) | 6666 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) |
| 6582 c = '\n'; | 6667 c = '\n'; |
| 6583 if (! EQ (eol_type, Qunix)) | 6668 if (! EQ (eol_type, Qunix)) |
| 6584 { | 6669 { |
| 6585 if (c == '\n') | 6670 if (c == '\n') |
