comparison src/coding.c @ 92992:49c4ea77b83a

(CHAR_STRING_ADVANCE_NO_UNIFY) (STRING_CHAR_ADVANCE_NO_UNIFY): New macros. (coding_alloc_by_making_gap): Fix the way to preserve data in the gap. (alloc_destination): Fix the 2nd arg to coding_alloc_by_making_gap. (encode_coding_utf_8): Use CHAR_STRING_ADVANCE_NO_UNIFY instead of CHAR_STRING_ADVANCE. (produce_chars): Fix for the case that the source and the destination are the same buffer. Use CHAR_STRING_ADVANCE_NO_UNIFY instead of CHAR_STRING_ADVANCE. (consume_chars): Use STRING_CHAR_ADVANCE_NO_UNIFY instead of STRING_CHAR_ADVANCE.
author Kenichi Handa <handa@m17n.org>
date Sun, 16 Mar 2008 01:24:55 +0000
parents 5f5f07a5c076
children 212fa666680e
comparison
equal deleted inserted replaced
92991:24a6717aed7f 92992:49c4ea77b83a
970 dst_end = coding->destination + coding->dst_bytes; \ 970 dst_end = coding->destination + coding->dst_bytes; \
971 } \ 971 } \
972 } while (0) 972 } while (0)
973 973
974 974
975 /* Store multibyte form of the character C in P, and advance P to the
976 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
977 never calls MAYBE_UNIFY_CHAR. */
978
979 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
980 do { \
981 if ((c) <= MAX_1_BYTE_CHAR) \
982 *(p)++ = (c); \
983 else if ((c) <= MAX_2_BYTE_CHAR) \
984 *(p)++ = (0xC0 | ((c) >> 6)), \
985 *(p)++ = (0x80 | ((c) & 0x3F)); \
986 else if ((c) <= MAX_3_BYTE_CHAR) \
987 *(p)++ = (0xE0 | ((c) >> 12)), \
988 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
989 *(p)++ = (0x80 | ((c) & 0x3F)); \
990 else if ((c) <= MAX_4_BYTE_CHAR) \
991 *(p)++ = (0xF0 | (c >> 18)), \
992 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
993 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
994 *(p)++ = (0x80 | (c & 0x3F)); \
995 else if ((c) <= MAX_5_BYTE_CHAR) \
996 *(p)++ = 0xF8, \
997 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
998 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
999 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1000 *(p)++ = (0x80 | (c & 0x3F)); \
1001 else \
1002 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1003 } while (0)
1004
1005
1006 /* Return the character code of character whose multibyte form is at
1007 P, and advance P to the end of the multibyte form. This is like
1008 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1009
1010 #define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1011 (!((p)[0] & 0x80) \
1012 ? *(p)++ \
1013 : ! ((p)[0] & 0x20) \
1014 ? ((p) += 2, \
1015 ((((p)[-2] & 0x1F) << 6) \
1016 | ((p)[-1] & 0x3F) \
1017 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1018 : ! ((p)[0] & 0x10) \
1019 ? ((p) += 3, \
1020 ((((p)[-3] & 0x0F) << 12) \
1021 | (((p)[-2] & 0x3F) << 6) \
1022 | ((p)[-1] & 0x3F))) \
1023 : ! ((p)[0] & 0x08) \
1024 ? ((p) += 4, \
1025 ((((p)[-4] & 0xF) << 18) \
1026 | (((p)[-3] & 0x3F) << 12) \
1027 | (((p)[-2] & 0x3F) << 6) \
1028 | ((p)[-1] & 0x3F))) \
1029 : ((p) += 5, \
1030 ((((p)[-4] & 0x3F) << 18) \
1031 | (((p)[-3] & 0x3F) << 12) \
1032 | (((p)[-2] & 0x3F) << 6) \
1033 | ((p)[-1] & 0x3F))))
1034
975 1035
976 static void 1036 static void
977 coding_set_source (coding) 1037 coding_set_source (coding)
978 struct coding_system *coding; 1038 struct coding_system *coding;
979 { 1039 {
1035 coding->dst_bytes + bytes); 1095 coding->dst_bytes + bytes);
1036 coding->dst_bytes += bytes; 1096 coding->dst_bytes += bytes;
1037 } 1097 }
1038 1098
1039 static void 1099 static void
1040 coding_alloc_by_making_gap (coding, offset, bytes) 1100 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1041 struct coding_system *coding; 1101 struct coding_system *coding;
1042 EMACS_INT offset, bytes; 1102 EMACS_INT gap_head_used, bytes;
1043 { 1103 {
1044 if (BUFFERP (coding->dst_object) 1104 if (EQ (coding->src_object, coding->dst_object))
1045 && EQ (coding->src_object, coding->dst_object)) 1105 {
1046 { 1106 /* The gap may contain the produced data at the head and not-yet
1047 EMACS_INT add = offset + (coding->src_bytes - coding->consumed); 1107 consumed data at the tail. To preserve those data, we at
1048 1108 first make the gap size to zero, then increase the gap
1049 GPT += offset, GPT_BYTE += offset; 1109 size. */
1050 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; 1110 EMACS_INT add = GAP_SIZE;
1111
1112 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1113 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1051 make_gap (bytes); 1114 make_gap (bytes);
1052 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; 1115 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1053 GPT -= offset, GPT_BYTE -= offset; 1116 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1054 } 1117 }
1055 else 1118 else
1056 { 1119 {
1057 Lisp_Object this_buffer; 1120 Lisp_Object this_buffer;
1058 1121
1071 unsigned char *dst; 1134 unsigned char *dst;
1072 { 1135 {
1073 EMACS_INT offset = dst - coding->destination; 1136 EMACS_INT offset = dst - coding->destination;
1074 1137
1075 if (BUFFERP (coding->dst_object)) 1138 if (BUFFERP (coding->dst_object))
1076 coding_alloc_by_making_gap (coding, offset, nbytes); 1139 {
1140 struct buffer *buf = XBUFFER (coding->dst_object);
1141
1142 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1143 }
1077 else 1144 else
1078 coding_alloc_by_realloc (coding, nbytes); 1145 coding_alloc_by_realloc (coding, nbytes);
1079 record_conversion_result (coding, CODING_RESULT_SUCCESS); 1146 record_conversion_result (coding, CODING_RESULT_SUCCESS);
1080 coding_set_destination (coding); 1147 coding_set_destination (coding);
1081 dst = coding->destination + offset; 1148 dst = coding->destination + offset;
1363 c = CHAR_TO_BYTE8 (c); 1430 c = CHAR_TO_BYTE8 (c);
1364 EMIT_ONE_BYTE (c); 1431 EMIT_ONE_BYTE (c);
1365 } 1432 }
1366 else 1433 else
1367 { 1434 {
1368 CHAR_STRING_ADVANCE (c, pend); 1435 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1369 for (p = str; p < pend; p++) 1436 for (p = str; p < pend; p++)
1370 EMIT_ONE_BYTE (*p); 1437 EMIT_ONE_BYTE (*p);
1371 } 1438 }
1372 } 1439 }
1373 } 1440 }
1380 ASSURE_DESTINATION (safe_room); 1447 ASSURE_DESTINATION (safe_room);
1381 c = *charbuf++; 1448 c = *charbuf++;
1382 if (CHAR_BYTE8_P (c)) 1449 if (CHAR_BYTE8_P (c))
1383 *dst++ = CHAR_TO_BYTE8 (c); 1450 *dst++ = CHAR_TO_BYTE8 (c);
1384 else 1451 else
1385 dst += CHAR_STRING (c, dst); 1452 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1386 produced_chars++; 1453 produced_chars++;
1387 } 1454 }
1388 } 1455 }
1389 record_conversion_result (coding, CODING_RESULT_SUCCESS); 1456 record_conversion_result (coding, CODING_RESULT_SUCCESS);
1390 coding->produced_char += produced_chars; 1457 coding->produced_char += produced_chars;
5969 { 6036 {
5970 /* Source characters are in coding->charbuf. */ 6037 /* Source characters are in coding->charbuf. */
5971 int *buf = coding->charbuf; 6038 int *buf = coding->charbuf;
5972 int *buf_end = buf + coding->charbuf_used; 6039 int *buf_end = buf + coding->charbuf_used;
5973 6040
5974 if (BUFFERP (coding->src_object) 6041 if (EQ (coding->src_object, coding->dst_object))
5975 && EQ (coding->src_object, coding->dst_object)) 6042 {
5976 dst_end = ((unsigned char *) coding->source) + coding->consumed; 6043 coding_set_source (coding);
6044 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6045 }
5977 6046
5978 while (buf < buf_end) 6047 while (buf < buf_end)
5979 { 6048 {
5980 int c = *buf, i; 6049 int c = *buf, i;
5981 6050
5998 { 6067 {
5999 dst = alloc_destination (coding, 6068 dst = alloc_destination (coding,
6000 buf_end - buf 6069 buf_end - buf
6001 + MAX_MULTIBYTE_LENGTH * to_nchars, 6070 + MAX_MULTIBYTE_LENGTH * to_nchars,
6002 dst); 6071 dst);
6003 dst_end = coding->destination + coding->dst_bytes; 6072 if (EQ (coding->src_object, coding->dst_object))
6073 {
6074 coding_set_source (coding);
6075 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6076 }
6077 else
6078 dst_end = coding->destination + coding->dst_bytes;
6004 } 6079 }
6005 6080
6006 for (i = 0; i < to_nchars; i++) 6081 for (i = 0; i < to_nchars; i++)
6007 { 6082 {
6008 if (i > 0) 6083 if (i > 0)
6009 c = XINT (AREF (trans, i)); 6084 c = XINT (AREF (trans, i));
6010 if (coding->dst_multibyte 6085 if (coding->dst_multibyte
6011 || ! CHAR_BYTE8_P (c)) 6086 || ! CHAR_BYTE8_P (c))
6012 CHAR_STRING_ADVANCE (c, dst); 6087 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6013 else 6088 else
6014 *dst++ = CHAR_TO_BYTE8 (c); 6089 *dst++ = CHAR_TO_BYTE8 (c);
6015 } 6090 }
6016 produced_chars += to_nchars; 6091 produced_chars += to_nchars;
6017 *buf++ = to_nchars; 6092 *buf++ = to_nchars;
6028 { 6103 {
6029 /* Source characters are at coding->source. */ 6104 /* Source characters are at coding->source. */
6030 const unsigned char *src = coding->source; 6105 const unsigned char *src = coding->source;
6031 const unsigned char *src_end = src + coding->consumed; 6106 const unsigned char *src_end = src + coding->consumed;
6032 6107
6108 if (EQ (coding->dst_object, coding->src_object))
6109 dst_end = (unsigned char *) src;
6033 if (coding->src_multibyte != coding->dst_multibyte) 6110 if (coding->src_multibyte != coding->dst_multibyte)
6034 { 6111 {
6035 if (coding->src_multibyte) 6112 if (coding->src_multibyte)
6036 { 6113 {
6037 int multibytep = 1; 6114 int multibytep = 1;
6055 dst); 6132 dst);
6056 dst_end = coding->destination + coding->dst_bytes; 6133 dst_end = coding->destination + coding->dst_bytes;
6057 coding_set_source (coding); 6134 coding_set_source (coding);
6058 src = coding->source + offset; 6135 src = coding->source + offset;
6059 src_end = coding->source + coding->src_bytes; 6136 src_end = coding->source + coding->src_bytes;
6137 if (EQ (coding->src_object, coding->dst_object))
6138 dst_end = (unsigned char *) src;
6060 } 6139 }
6061 } 6140 }
6062 *dst++ = c; 6141 *dst++ = c;
6063 produced_chars++; 6142 produced_chars++;
6064 } 6143 }
6076 if (EQ (coding->src_object, coding->dst_object)) 6155 if (EQ (coding->src_object, coding->dst_object))
6077 dst_end = (unsigned char *) src; 6156 dst_end = (unsigned char *) src;
6078 if (dst >= dst_end - 1) 6157 if (dst >= dst_end - 1)
6079 { 6158 {
6080 EMACS_INT offset = src - coding->source; 6159 EMACS_INT offset = src - coding->source;
6081 6160 EMACS_INT more_bytes;
6082 dst = alloc_destination (coding, src_end - src + 2, 6161
6083 dst); 6162 if (EQ (coding->src_object, coding->dst_object))
6163 more_bytes = ((src_end - src) / 2) + 2;
6164 else
6165 more_bytes = src_end - src + 2;
6166 dst = alloc_destination (coding, more_bytes, dst);
6084 dst_end = coding->destination + coding->dst_bytes; 6167 dst_end = coding->destination + coding->dst_bytes;
6085 coding_set_source (coding); 6168 coding_set_source (coding);
6086 src = coding->source + offset; 6169 src = coding->source + offset;
6087 src_end = coding->source + coding->src_bytes; 6170 src_end = coding->source + coding->src_bytes;
6171 if (EQ (coding->src_object, coding->dst_object))
6172 dst_end = (unsigned char *) src;
6088 } 6173 }
6089 } 6174 }
6090 EMIT_ONE_BYTE (c); 6175 EMIT_ONE_BYTE (c);
6091 } 6176 }
6092 } 6177 }
6570 EMACS_INT bytes; 6655 EMACS_INT bytes;
6571 6656
6572 if (coding->encoder == encode_coding_raw_text) 6657 if (coding->encoder == encode_coding_raw_text)
6573 c = *src++, pos++; 6658 c = *src++, pos++;
6574 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0) 6659 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6575 c = STRING_CHAR_ADVANCE (src), pos += bytes; 6660 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6576 else 6661 else
6577 c = BYTE8_TO_CHAR (*src), src++, pos++; 6662 c = BYTE8_TO_CHAR (*src), src++, pos++;
6578 } 6663 }
6579 else 6664 else
6580 c = STRING_CHAR_ADVANCE (src), pos++; 6665 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6581 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) 6666 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6582 c = '\n'; 6667 c = '\n';
6583 if (! EQ (eol_type, Qunix)) 6668 if (! EQ (eol_type, Qunix))
6584 { 6669 {
6585 if (c == '\n') 6670 if (c == '\n')