Mercurial > emacs
comparison src/coding.c @ 92399:fbc07b3a6baf
(decode_coding_utf_8): When eol-type of CODING is
`dos', don't decode '\r' if that is the last in the source.
(decode_coding_utf_16, decode_coding_emacs_mule)
(decode_coding_iso_2022, decode_coding_sjis, decode_coding_big5)
(decode_coding_raw_text, decode_coding_charset): Likewise.
(produce_chars): Don't decode EOL here. Use EMACS_INT.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Sun, 02 Mar 2008 08:03:42 +0000 |
| parents | 52c3ba60b54b |
| children | c36bda0a03cc |
comparison
equal
deleted
inserted
replaced
| 92398:c98e70371bdc | 92399:fbc07b3a6baf |
|---|---|
| 953 src_end += offset; \ | 953 src_end += offset; \ |
| 954 } \ | 954 } \ |
| 955 } while (0) | 955 } while (0) |
| 956 | 956 |
| 957 | 957 |
| 958 /* If there are at least BYTES length of room at dst, allocate memory | |
| 959 for coding->destination and update dst and dst_end. We don't have | |
| 960 to take care of coding->source which will be relocated. It is | |
| 961 handled by calling coding_set_source in encode_coding. */ | |
| 962 | |
| 958 #define ASSURE_DESTINATION(bytes) \ | 963 #define ASSURE_DESTINATION(bytes) \ |
| 959 do { \ | 964 do { \ |
| 960 if (dst + (bytes) >= dst_end) \ | 965 if (dst + (bytes) >= dst_end) \ |
| 961 { \ | 966 { \ |
| 962 int more_bytes = charbuf_end - charbuf + (bytes); \ | 967 int more_bytes = charbuf_end - charbuf + (bytes); \ |
| 1223 int *charbuf = coding->charbuf + coding->charbuf_used; | 1228 int *charbuf = coding->charbuf + coding->charbuf_used; |
| 1224 int *charbuf_end = coding->charbuf + coding->charbuf_size; | 1229 int *charbuf_end = coding->charbuf + coding->charbuf_size; |
| 1225 int consumed_chars = 0, consumed_chars_base; | 1230 int consumed_chars = 0, consumed_chars_base; |
| 1226 int multibytep = coding->src_multibyte; | 1231 int multibytep = coding->src_multibyte; |
| 1227 Lisp_Object attr, charset_list; | 1232 Lisp_Object attr, charset_list; |
| 1233 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | |
| 1234 int byte_after_cr = -1; | |
| 1228 | 1235 |
| 1229 CODING_GET_INFO (coding, attr, charset_list); | 1236 CODING_GET_INFO (coding, attr, charset_list); |
| 1230 | 1237 |
| 1231 while (1) | 1238 while (1) |
| 1232 { | 1239 { |
| 1236 consumed_chars_base = consumed_chars; | 1243 consumed_chars_base = consumed_chars; |
| 1237 | 1244 |
| 1238 if (charbuf >= charbuf_end) | 1245 if (charbuf >= charbuf_end) |
| 1239 break; | 1246 break; |
| 1240 | 1247 |
| 1241 ONE_MORE_BYTE (c1); | 1248 if (byte_after_cr >= 0) |
| 1249 c1 = byte_after_cr, byte_after_cr = -1; | |
| 1250 else | |
| 1251 ONE_MORE_BYTE (c1); | |
| 1242 if (c1 < 0) | 1252 if (c1 < 0) |
| 1243 { | 1253 { |
| 1244 c = - c1; | 1254 c = - c1; |
| 1245 } | 1255 } |
| 1246 else if (UTF_8_1_OCTET_P(c1)) | 1256 else if (UTF_8_1_OCTET_P(c1)) |
| 1247 { | 1257 { |
| 1258 if (eol_crlf && c1 == '\r') | |
| 1259 ONE_MORE_BYTE (byte_after_cr); | |
| 1248 c = c1; | 1260 c = c1; |
| 1249 } | 1261 } |
| 1250 else | 1262 else |
| 1251 { | 1263 { |
| 1252 ONE_MORE_BYTE (c2); | 1264 ONE_MORE_BYTE (c2); |
| 1456 int multibytep = coding->src_multibyte; | 1468 int multibytep = coding->src_multibyte; |
| 1457 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); | 1469 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); |
| 1458 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); | 1470 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); |
| 1459 int surrogate = CODING_UTF_16_SURROGATE (coding); | 1471 int surrogate = CODING_UTF_16_SURROGATE (coding); |
| 1460 Lisp_Object attr, charset_list; | 1472 Lisp_Object attr, charset_list; |
| 1473 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | |
| 1474 int byte_after_cr1 = -1, byte_after_cr2 = -1; | |
| 1461 | 1475 |
| 1462 CODING_GET_INFO (coding, attr, charset_list); | 1476 CODING_GET_INFO (coding, attr, charset_list); |
| 1463 | 1477 |
| 1464 if (bom == utf_16_with_bom) | 1478 if (bom == utf_16_with_bom) |
| 1465 { | 1479 { |
| 1495 consumed_chars_base = consumed_chars; | 1509 consumed_chars_base = consumed_chars; |
| 1496 | 1510 |
| 1497 if (charbuf + 2 >= charbuf_end) | 1511 if (charbuf + 2 >= charbuf_end) |
| 1498 break; | 1512 break; |
| 1499 | 1513 |
| 1500 ONE_MORE_BYTE (c1); | 1514 if (byte_after_cr1 >= 0) |
| 1515 c1 = byte_after_cr1, byte_after_cr1 = -1; | |
| 1516 else | |
| 1517 ONE_MORE_BYTE (c1); | |
| 1501 if (c1 < 0) | 1518 if (c1 < 0) |
| 1502 { | 1519 { |
| 1503 *charbuf++ = -c1; | 1520 *charbuf++ = -c1; |
| 1504 continue; | 1521 continue; |
| 1505 } | 1522 } |
| 1506 ONE_MORE_BYTE (c2); | 1523 if (byte_after_cr2 >= 0) |
| 1524 c2 = byte_after_cr2, byte_after_cr2 = -1; | |
| 1525 else | |
| 1526 ONE_MORE_BYTE (c2); | |
| 1507 if (c2 < 0) | 1527 if (c2 < 0) |
| 1508 { | 1528 { |
| 1509 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); | 1529 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); |
| 1510 *charbuf++ = -c2; | 1530 *charbuf++ = -c2; |
| 1511 continue; | 1531 continue; |
| 1512 } | 1532 } |
| 1513 c = (endian == utf_16_big_endian | 1533 c = (endian == utf_16_big_endian |
| 1514 ? ((c1 << 8) | c2) : ((c2 << 8) | c1)); | 1534 ? ((c1 << 8) | c2) : ((c2 << 8) | c1)); |
| 1535 | |
| 1515 if (surrogate) | 1536 if (surrogate) |
| 1516 { | 1537 { |
| 1517 if (! UTF_16_LOW_SURROGATE_P (c)) | 1538 if (! UTF_16_LOW_SURROGATE_P (c)) |
| 1518 { | 1539 { |
| 1519 if (endian == utf_16_big_endian) | 1540 if (endian == utf_16_big_endian) |
| 1538 else | 1559 else |
| 1539 { | 1560 { |
| 1540 if (UTF_16_HIGH_SURROGATE_P (c)) | 1561 if (UTF_16_HIGH_SURROGATE_P (c)) |
| 1541 CODING_UTF_16_SURROGATE (coding) = surrogate = c; | 1562 CODING_UTF_16_SURROGATE (coding) = surrogate = c; |
| 1542 else | 1563 else |
| 1543 *charbuf++ = c; | 1564 { |
| 1565 if (eol_crlf && c == '\r') | |
| 1566 { | |
| 1567 ONE_MORE_BYTE (byte_after_cr1); | |
| 1568 ONE_MORE_BYTE (byte_after_cr2); | |
| 1569 } | |
| 1570 *charbuf++ = c; | |
| 1571 } | |
| 1544 } | 1572 } |
| 1545 } | 1573 } |
| 1546 | 1574 |
| 1547 no_more_source: | 1575 no_more_source: |
| 1548 coding->consumed_char += consumed_chars_base; | 1576 coding->consumed_char += consumed_chars_base; |
| 2070 int multibytep = coding->src_multibyte; | 2098 int multibytep = coding->src_multibyte; |
| 2071 Lisp_Object attrs, charset_list; | 2099 Lisp_Object attrs, charset_list; |
| 2072 int char_offset = coding->produced_char; | 2100 int char_offset = coding->produced_char; |
| 2073 int last_offset = char_offset; | 2101 int last_offset = char_offset; |
| 2074 int last_id = charset_ascii; | 2102 int last_id = charset_ascii; |
| 2103 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | |
| 2104 int byte_after_cr = -1; | |
| 2075 | 2105 |
| 2076 CODING_GET_INFO (coding, attrs, charset_list); | 2106 CODING_GET_INFO (coding, attrs, charset_list); |
| 2077 | 2107 |
| 2078 while (1) | 2108 while (1) |
| 2079 { | 2109 { |
| 2083 consumed_chars_base = consumed_chars; | 2113 consumed_chars_base = consumed_chars; |
| 2084 | 2114 |
| 2085 if (charbuf >= charbuf_end) | 2115 if (charbuf >= charbuf_end) |
| 2086 break; | 2116 break; |
| 2087 | 2117 |
| 2088 ONE_MORE_BYTE (c); | 2118 if (byte_after_cr >= 0) |
| 2119 c = byte_after_cr, byte_after_cr = -1; | |
| 2120 else | |
| 2121 ONE_MORE_BYTE (c); | |
| 2089 if (c < 0) | 2122 if (c < 0) |
| 2090 { | 2123 { |
| 2091 *charbuf++ = -c; | 2124 *charbuf++ = -c; |
| 2092 char_offset++; | 2125 char_offset++; |
| 2093 } | 2126 } |
| 2094 else if (c < 0x80) | 2127 else if (c < 0x80) |
| 2095 { | 2128 { |
| 2129 if (eol_crlf && c == '\r') | |
| 2130 ONE_MORE_BYTE (byte_after_cr); | |
| 2096 *charbuf++ = c; | 2131 *charbuf++ = c; |
| 2097 char_offset++; | 2132 char_offset++; |
| 2098 } | 2133 } |
| 2099 else if (c == 0x80) | 2134 else if (c == 0x80) |
| 2100 { | 2135 { |
| 2943 int component_len; | 2978 int component_len; |
| 2944 Lisp_Object attrs, charset_list; | 2979 Lisp_Object attrs, charset_list; |
| 2945 int char_offset = coding->produced_char; | 2980 int char_offset = coding->produced_char; |
| 2946 int last_offset = char_offset; | 2981 int last_offset = char_offset; |
| 2947 int last_id = charset_ascii; | 2982 int last_id = charset_ascii; |
| 2983 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | |
| 2984 int byte_after_cr = -1; | |
| 2948 | 2985 |
| 2949 CODING_GET_INFO (coding, attrs, charset_list); | 2986 CODING_GET_INFO (coding, attrs, charset_list); |
| 2950 setup_iso_safe_charsets (attrs); | 2987 setup_iso_safe_charsets (attrs); |
| 2951 /* Charset list may have been changed. */ | 2988 /* Charset list may have been changed. */ |
| 2952 charset_list = CODING_ATTR_CHARSET_LIST (attrs); | 2989 charset_list = CODING_ATTR_CHARSET_LIST (attrs); |
| 2960 consumed_chars_base = consumed_chars; | 2997 consumed_chars_base = consumed_chars; |
| 2961 | 2998 |
| 2962 if (charbuf >= charbuf_end) | 2999 if (charbuf >= charbuf_end) |
| 2963 break; | 3000 break; |
| 2964 | 3001 |
| 2965 ONE_MORE_BYTE (c1); | 3002 if (byte_after_cr >= 0) |
| 3003 c1 = byte_after_cr, byte_after_cr = -1; | |
| 3004 else | |
| 3005 ONE_MORE_BYTE (c1); | |
| 2966 if (c1 < 0) | 3006 if (c1 < 0) |
| 2967 goto invalid_code; | 3007 goto invalid_code; |
| 2968 | 3008 |
| 2969 /* We produce at most one character. */ | 3009 /* We produce at most one character. */ |
| 2970 switch (iso_code_class [c1]) | 3010 switch (iso_code_class [c1]) |
| 3019 goto invalid_code; | 3059 goto invalid_code; |
| 3020 charset = CHARSET_FROM_ID (charset_id_1); | 3060 charset = CHARSET_FROM_ID (charset_id_1); |
| 3021 break; | 3061 break; |
| 3022 | 3062 |
| 3023 case ISO_control_0: | 3063 case ISO_control_0: |
| 3064 if (eol_crlf && c1 == '\r') | |
| 3065 ONE_MORE_BYTE (byte_after_cr); | |
| 3024 MAYBE_FINISH_COMPOSITION (); | 3066 MAYBE_FINISH_COMPOSITION (); |
| 3025 charset = CHARSET_FROM_ID (charset_ascii); | 3067 charset = CHARSET_FROM_ID (charset_ascii); |
| 3026 break; | 3068 break; |
| 3027 | 3069 |
| 3028 case ISO_control_1: | 3070 case ISO_control_1: |
| 4089 struct charset *charset_kanji2; | 4131 struct charset *charset_kanji2; |
| 4090 Lisp_Object attrs, charset_list, val; | 4132 Lisp_Object attrs, charset_list, val; |
| 4091 int char_offset = coding->produced_char; | 4133 int char_offset = coding->produced_char; |
| 4092 int last_offset = char_offset; | 4134 int last_offset = char_offset; |
| 4093 int last_id = charset_ascii; | 4135 int last_id = charset_ascii; |
| 4136 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | |
| 4137 int byte_after_cr = -1; | |
| 4094 | 4138 |
| 4095 CODING_GET_INFO (coding, attrs, charset_list); | 4139 CODING_GET_INFO (coding, attrs, charset_list); |
| 4096 | 4140 |
| 4097 val = charset_list; | 4141 val = charset_list; |
| 4098 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4142 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4109 consumed_chars_base = consumed_chars; | 4153 consumed_chars_base = consumed_chars; |
| 4110 | 4154 |
| 4111 if (charbuf >= charbuf_end) | 4155 if (charbuf >= charbuf_end) |
| 4112 break; | 4156 break; |
| 4113 | 4157 |
| 4114 ONE_MORE_BYTE (c); | 4158 if (byte_after_cr >= 0) |
| 4159 c = byte_after_cr, byte_after_cr = -1; | |
| 4160 else | |
| 4161 ONE_MORE_BYTE (c); | |
| 4115 if (c < 0) | 4162 if (c < 0) |
| 4116 goto invalid_code; | 4163 goto invalid_code; |
| 4117 if (c < 0x80) | 4164 if (c < 0x80) |
| 4118 charset = charset_roman; | 4165 { |
| 4166 if (eol_crlf && c == '\r') | |
| 4167 ONE_MORE_BYTE (byte_after_cr); | |
| 4168 charset = charset_roman; | |
| 4169 } | |
| 4119 else if (c == 0x80 || c == 0xA0) | 4170 else if (c == 0x80 || c == 0xA0) |
| 4120 goto invalid_code; | 4171 goto invalid_code; |
| 4121 else if (c >= 0xA1 && c <= 0xDF) | 4172 else if (c >= 0xA1 && c <= 0xDF) |
| 4122 { | 4173 { |
| 4123 /* SJIS -> JISX0201-Kana */ | 4174 /* SJIS -> JISX0201-Kana */ |
| 4191 struct charset *charset_roman, *charset_big5; | 4242 struct charset *charset_roman, *charset_big5; |
| 4192 Lisp_Object attrs, charset_list, val; | 4243 Lisp_Object attrs, charset_list, val; |
| 4193 int char_offset = coding->produced_char; | 4244 int char_offset = coding->produced_char; |
| 4194 int last_offset = char_offset; | 4245 int last_offset = char_offset; |
| 4195 int last_id = charset_ascii; | 4246 int last_id = charset_ascii; |
| 4247 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | |
| 4248 int byte_after_cr = -1; | |
| 4196 | 4249 |
| 4197 CODING_GET_INFO (coding, attrs, charset_list); | 4250 CODING_GET_INFO (coding, attrs, charset_list); |
| 4198 val = charset_list; | 4251 val = charset_list; |
| 4199 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4252 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4200 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); | 4253 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); |
| 4208 consumed_chars_base = consumed_chars; | 4261 consumed_chars_base = consumed_chars; |
| 4209 | 4262 |
| 4210 if (charbuf >= charbuf_end) | 4263 if (charbuf >= charbuf_end) |
| 4211 break; | 4264 break; |
| 4212 | 4265 |
| 4213 ONE_MORE_BYTE (c); | 4266 if (byte_after_cr >= 0) |
| 4267 c1 = byte_after_cr, byte_after_cr = -1; | |
| 4268 else | |
| 4269 ONE_MORE_BYTE (c); | |
| 4214 | 4270 |
| 4215 if (c < 0) | 4271 if (c < 0) |
| 4216 goto invalid_code; | 4272 goto invalid_code; |
| 4217 if (c < 0x80) | 4273 if (c < 0x80) |
| 4218 charset = charset_roman; | 4274 { |
| 4275 if (eol_crlf && c1 == '\r') | |
| 4276 ONE_MORE_BYTE (byte_after_cr); | |
| 4277 charset = charset_roman; | |
| 4278 } | |
| 4219 else | 4279 else |
| 4220 { | 4280 { |
| 4221 /* BIG5 -> Big5 */ | 4281 /* BIG5 -> Big5 */ |
| 4222 if (c < 0xA1 || c > 0xFE) | 4282 if (c < 0xA1 || c > 0xFE) |
| 4223 goto invalid_code; | 4283 goto invalid_code; |
| 4630 | 4690 |
| 4631 static void | 4691 static void |
| 4632 decode_coding_raw_text (coding) | 4692 decode_coding_raw_text (coding) |
| 4633 struct coding_system *coding; | 4693 struct coding_system *coding; |
| 4634 { | 4694 { |
| 4695 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | |
| 4696 | |
| 4635 coding->chars_at_source = 1; | 4697 coding->chars_at_source = 1; |
| 4636 coding->consumed_char = 0; | 4698 coding->consumed_char = coding->src_chars; |
| 4637 coding->consumed = 0; | 4699 coding->consumed = coding->src_bytes; |
| 4638 record_conversion_result (coding, CODING_RESULT_SUCCESS); | 4700 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r') |
| 4701 { | |
| 4702 coding->consumed_char--; | |
| 4703 coding->consumed--; | |
| 4704 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC); | |
| 4705 } | |
| 4706 else | |
| 4707 record_conversion_result (coding, CODING_RESULT_SUCCESS); | |
| 4639 } | 4708 } |
| 4640 | 4709 |
| 4641 static int | 4710 static int |
| 4642 encode_coding_raw_text (coding) | 4711 encode_coding_raw_text (coding) |
| 4643 struct coding_system *coding; | 4712 struct coding_system *coding; |
| 4827 int multibytep = coding->src_multibyte; | 4896 int multibytep = coding->src_multibyte; |
| 4828 Lisp_Object attrs, charset_list, valids; | 4897 Lisp_Object attrs, charset_list, valids; |
| 4829 int char_offset = coding->produced_char; | 4898 int char_offset = coding->produced_char; |
| 4830 int last_offset = char_offset; | 4899 int last_offset = char_offset; |
| 4831 int last_id = charset_ascii; | 4900 int last_id = charset_ascii; |
| 4901 int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | |
| 4902 int byte_after_cr = -1; | |
| 4832 | 4903 |
| 4833 CODING_GET_INFO (coding, attrs, charset_list); | 4904 CODING_GET_INFO (coding, attrs, charset_list); |
| 4834 valids = AREF (attrs, coding_attr_charset_valids); | 4905 valids = AREF (attrs, coding_attr_charset_valids); |
| 4835 | 4906 |
| 4836 while (1) | 4907 while (1) |
| 4846 consumed_chars_base = consumed_chars; | 4917 consumed_chars_base = consumed_chars; |
| 4847 | 4918 |
| 4848 if (charbuf >= charbuf_end) | 4919 if (charbuf >= charbuf_end) |
| 4849 break; | 4920 break; |
| 4850 | 4921 |
| 4851 ONE_MORE_BYTE (c); | 4922 if (byte_after_cr >= 0) |
| 4923 { | |
| 4924 c = byte_after_cr; | |
| 4925 byte_after_cr = -1; | |
| 4926 } | |
| 4927 else | |
| 4928 { | |
| 4929 ONE_MORE_BYTE (c); | |
| 4930 if (eol_crlf && c == '\r') | |
| 4931 ONE_MORE_BYTE (byte_after_cr); | |
| 4932 } | |
| 4852 if (c < 0) | 4933 if (c < 0) |
| 4853 goto invalid_code; | 4934 goto invalid_code; |
| 4854 code = c; | 4935 code = c; |
| 4855 | 4936 |
| 4856 val = AREF (valids, c); | 4937 val = AREF (valids, c); |
| 5878 Lisp_Object translation_table; | 5959 Lisp_Object translation_table; |
| 5879 int last_block; | 5960 int last_block; |
| 5880 { | 5961 { |
| 5881 unsigned char *dst = coding->destination + coding->produced; | 5962 unsigned char *dst = coding->destination + coding->produced; |
| 5882 unsigned char *dst_end = coding->destination + coding->dst_bytes; | 5963 unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 5883 int produced; | 5964 EMACS_INT produced; |
| 5884 int produced_chars = 0; | 5965 EMACS_INT produced_chars = 0; |
| 5885 int carryover = 0; | 5966 int carryover = 0; |
| 5886 | 5967 |
| 5887 if (! coding->chars_at_source) | 5968 if (! coding->chars_at_source) |
| 5888 { | 5969 { |
| 5889 /* Characters are in coding->charbuf. */ | 5970 /* Source characters are in coding->charbuf. */ |
| 5890 int *buf = coding->charbuf; | 5971 int *buf = coding->charbuf; |
| 5891 int *buf_end = buf + coding->charbuf_used; | 5972 int *buf_end = buf + coding->charbuf_used; |
| 5892 | 5973 |
| 5893 if (BUFFERP (coding->src_object) | 5974 if (BUFFERP (coding->src_object) |
| 5894 && EQ (coding->src_object, coding->dst_object)) | 5975 && EQ (coding->src_object, coding->dst_object)) |
| 5943 } | 6024 } |
| 5944 carryover = buf_end - buf; | 6025 carryover = buf_end - buf; |
| 5945 } | 6026 } |
| 5946 else | 6027 else |
| 5947 { | 6028 { |
| 6029 /* Source characters are at coding->source. */ | |
| 5948 const unsigned char *src = coding->source; | 6030 const unsigned char *src = coding->source; |
| 5949 const unsigned char *src_end = src + coding->src_bytes; | 6031 const unsigned char *src_end = src + coding->consumed; |
| 5950 Lisp_Object eol_type; | |
| 5951 | |
| 5952 eol_type = CODING_ID_EOL_TYPE (coding->id); | |
| 5953 | 6032 |
| 5954 if (coding->src_multibyte != coding->dst_multibyte) | 6033 if (coding->src_multibyte != coding->dst_multibyte) |
| 5955 { | 6034 { |
| 5956 if (coding->src_multibyte) | 6035 if (coding->src_multibyte) |
| 5957 { | 6036 { |
| 5958 int multibytep = 1; | 6037 int multibytep = 1; |
| 5959 int consumed_chars; | 6038 EMACS_INT consumed_chars; |
| 5960 | 6039 |
| 5961 while (1) | 6040 while (1) |
| 5962 { | 6041 { |
| 5963 const unsigned char *src_base = src; | 6042 const unsigned char *src_base = src; |
| 5964 int c; | 6043 int c; |
| 5965 | 6044 |
| 5966 ONE_MORE_BYTE (c); | 6045 ONE_MORE_BYTE (c); |
| 5967 if (c == '\r') | |
| 5968 { | |
| 5969 if (EQ (eol_type, Qdos)) | |
| 5970 { | |
| 5971 if (src == src_end) | |
| 5972 { | |
| 5973 record_conversion_result | |
| 5974 (coding, CODING_RESULT_INSUFFICIENT_SRC); | |
| 5975 goto no_more_source; | |
| 5976 } | |
| 5977 if (*src == '\n') | |
| 5978 c = *src++; | |
| 5979 } | |
| 5980 else if (EQ (eol_type, Qmac)) | |
| 5981 c = '\n'; | |
| 5982 } | |
| 5983 if (dst == dst_end) | 6046 if (dst == dst_end) |
| 5984 { | 6047 { |
| 5985 coding->consumed = src - coding->source; | 6048 if (EQ (coding->src_object, coding->dst_object)) |
| 5986 | 6049 dst_end = (unsigned char *) src; |
| 5987 if (EQ (coding->src_object, coding->dst_object)) | 6050 if (dst == dst_end) |
| 5988 dst_end = (unsigned char *) src; | 6051 { |
| 5989 if (dst == dst_end) | 6052 EMACS_INT offset = src - coding->source; |
| 5990 { | 6053 |
| 5991 dst = alloc_destination (coding, src_end - src + 1, | 6054 dst = alloc_destination (coding, src_end - src + 1, |
| 5992 dst); | 6055 dst); |
| 5993 dst_end = coding->destination + coding->dst_bytes; | 6056 dst_end = coding->destination + coding->dst_bytes; |
| 5994 coding_set_source (coding); | 6057 coding_set_source (coding); |
| 5995 src = coding->source + coding->consumed; | 6058 src = coding->source + offset; |
| 5996 src_end = coding->source + coding->src_bytes; | 6059 src_end = coding->source + coding->src_bytes; |
| 5997 } | 6060 } |
| 5998 } | 6061 } |
| 5999 *dst++ = c; | 6062 *dst++ = c; |
| 6000 produced_chars++; | 6063 produced_chars++; |
| 6001 } | 6064 } |
| 6002 no_more_source: | 6065 no_more_source: |
| 6006 while (src < src_end) | 6069 while (src < src_end) |
| 6007 { | 6070 { |
| 6008 int multibytep = 1; | 6071 int multibytep = 1; |
| 6009 int c = *src++; | 6072 int c = *src++; |
| 6010 | 6073 |
| 6011 if (c == '\r') | |
| 6012 { | |
| 6013 if (EQ (eol_type, Qdos)) | |
| 6014 { | |
| 6015 if (src < src_end | |
| 6016 && *src == '\n') | |
| 6017 c = *src++; | |
| 6018 } | |
| 6019 else if (EQ (eol_type, Qmac)) | |
| 6020 c = '\n'; | |
| 6021 } | |
| 6022 if (dst >= dst_end - 1) | 6074 if (dst >= dst_end - 1) |
| 6023 { | 6075 { |
| 6024 coding->consumed = src - coding->source; | |
| 6025 | |
| 6026 if (EQ (coding->src_object, coding->dst_object)) | 6076 if (EQ (coding->src_object, coding->dst_object)) |
| 6027 dst_end = (unsigned char *) src; | 6077 dst_end = (unsigned char *) src; |
| 6028 if (dst >= dst_end - 1) | 6078 if (dst >= dst_end - 1) |
| 6029 { | 6079 { |
| 6080 EMACS_INT offset = src - coding->source; | |
| 6081 | |
| 6030 dst = alloc_destination (coding, src_end - src + 2, | 6082 dst = alloc_destination (coding, src_end - src + 2, |
| 6031 dst); | 6083 dst); |
| 6032 dst_end = coding->destination + coding->dst_bytes; | 6084 dst_end = coding->destination + coding->dst_bytes; |
| 6033 coding_set_source (coding); | 6085 coding_set_source (coding); |
| 6034 src = coding->source + coding->consumed; | 6086 src = coding->source + offset; |
| 6035 src_end = coding->source + coding->src_bytes; | 6087 src_end = coding->source + coding->src_bytes; |
| 6036 } | 6088 } |
| 6037 } | 6089 } |
| 6038 EMIT_ONE_BYTE (c); | 6090 EMIT_ONE_BYTE (c); |
| 6039 } | 6091 } |
| 6040 } | 6092 } |
| 6041 else | 6093 else |
| 6042 { | 6094 { |
| 6043 if (!EQ (coding->src_object, coding->dst_object)) | 6095 if (!EQ (coding->src_object, coding->dst_object)) |
| 6044 { | 6096 { |
| 6045 int require = coding->src_bytes - coding->dst_bytes; | 6097 EMACS_INT require = coding->src_bytes - coding->dst_bytes; |
| 6046 | 6098 |
| 6047 if (require > 0) | 6099 if (require > 0) |
| 6048 { | 6100 { |
| 6049 EMACS_INT offset = src - coding->source; | 6101 EMACS_INT offset = src - coding->source; |
| 6050 | 6102 |
| 6052 coding_set_source (coding); | 6104 coding_set_source (coding); |
| 6053 src = coding->source + offset; | 6105 src = coding->source + offset; |
| 6054 src_end = coding->source + coding->src_bytes; | 6106 src_end = coding->source + coding->src_bytes; |
| 6055 } | 6107 } |
| 6056 } | 6108 } |
| 6057 produced_chars = coding->src_chars; | 6109 produced_chars = coding->consumed_char; |
| 6058 while (src < src_end) | 6110 while (src < src_end) |
| 6059 { | 6111 *dst += *src++; |
| 6060 int c = *src++; | 6112 } |
| 6061 | |
| 6062 if (c == '\r') | |
| 6063 { | |
| 6064 if (EQ (eol_type, Qdos)) | |
| 6065 { | |
| 6066 if (src < src_end | |
| 6067 && *src == '\n') | |
| 6068 c = *src++; | |
| 6069 produced_chars--; | |
| 6070 } | |
| 6071 else if (EQ (eol_type, Qmac)) | |
| 6072 c = '\n'; | |
| 6073 } | |
| 6074 *dst++ = c; | |
| 6075 } | |
| 6076 } | |
| 6077 coding->consumed = coding->src_bytes; | |
| 6078 coding->consumed_char = coding->src_chars; | |
| 6079 } | 6113 } |
| 6080 | 6114 |
| 6081 produced = dst - (coding->destination + coding->produced); | 6115 produced = dst - (coding->destination + coding->produced); |
| 6082 if (BUFFERP (coding->dst_object) && produced_chars > 0) | 6116 if (BUFFERP (coding->dst_object) && produced_chars > 0) |
| 6083 insert_from_gap (produced_chars, produced); | 6117 insert_from_gap (produced_chars, produced); |
