Mercurial > emacs
comparison src/coding.c @ 20931:068eb408c911
(decode_coding_iso2022): Update coding->fake_multibyte.
(ENCODE_SINGLE_SHIFT_2, ENCODE_SINGLE_SHIFT_3,
encode_coding_iso2022, decode_coding_sjis_big5,
encode_coding_sjis_big5, decode_eol, encode_eol, decode_coding,
encode_coding): Likewise.
(shrink_decoding_region, shrink_encoding_region): Do not skip
non-ASCII code in any cases. Bug fix for getting starting address
from BEG.
(code_convert_region): Sync character positions correctly by
paying attention to coding->fake_multibyte.
(code_convert_string): Set number of character and bytes just
processed in members of CODING.
(code_convert_string): Adjusted for the change of
code_convert_region.
(code_convert_region1): Likewise.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Fri, 20 Feb 1998 01:40:47 +0000 |
| parents | 0fa2183c587d |
| children | e4dd62e5d921 |
comparison
equal
deleted
inserted
replaced
| 20930:1331679fe704 | 20931:068eb408c911 |
|---|---|
| 1005 | 1005 |
| 1006 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 1006 if (!NILP (Venable_character_unification) && NILP (unification_table)) |
| 1007 unification_table = Vstandard_character_unification_table_for_decode; | 1007 unification_table = Vstandard_character_unification_table_for_decode; |
| 1008 | 1008 |
| 1009 coding->produced_char = 0; | 1009 coding->produced_char = 0; |
| 1010 coding->fake_multibyte = 0; | |
| 1010 while (src < src_end && (dst_bytes | 1011 while (src < src_end && (dst_bytes |
| 1011 ? (dst < adjusted_dst_end) | 1012 ? (dst < adjusted_dst_end) |
| 1012 : (dst < src - 6))) | 1013 : (dst < src - 6))) |
| 1013 { | 1014 { |
| 1014 /* SRC_BASE remembers the start position in source in each loop. | 1015 /* SRC_BASE remembers the start position in source in each loop. |
| 1044 break; | 1045 break; |
| 1045 | 1046 |
| 1046 case ISO_0xA0_or_0xFF: | 1047 case ISO_0xA0_or_0xFF: |
| 1047 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 | 1048 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 |
| 1048 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) | 1049 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) |
| 1049 { | 1050 goto label_invalid_code; |
| 1050 /* Invalid code. */ | |
| 1051 *dst++ = c1; | |
| 1052 coding->produced_char++; | |
| 1053 break; | |
| 1054 } | |
| 1055 /* This is a graphic character, we fall down ... */ | 1051 /* This is a graphic character, we fall down ... */ |
| 1056 | 1052 |
| 1057 case ISO_graphic_plane_1: | 1053 case ISO_graphic_plane_1: |
| 1058 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) | 1054 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) |
| 1059 { | 1055 goto label_invalid_code; |
| 1060 /* Invalid code. */ | |
| 1061 *dst++ = c1; | |
| 1062 coding->produced_char++; | |
| 1063 } | |
| 1064 else | 1056 else |
| 1065 DECODE_ISO_CHARACTER (charset1, c1); | 1057 DECODE_ISO_CHARACTER (charset1, c1); |
| 1066 break; | 1058 break; |
| 1067 | 1059 |
| 1068 case ISO_control_code: | 1060 case ISO_control_code: |
| 1308 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 1300 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 1309 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); | 1301 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); |
| 1310 break; | 1302 break; |
| 1311 | 1303 |
| 1312 label_invalid_code: | 1304 label_invalid_code: |
| 1313 coding->produced_char += src - src_base; | |
| 1314 while (src_base < src) | 1305 while (src_base < src) |
| 1315 *dst++ = *src_base++; | 1306 *dst++ = *src_base++; |
| 1307 coding->fake_multibyte = 1; | |
| 1316 } | 1308 } |
| 1317 continue; | 1309 continue; |
| 1318 | 1310 |
| 1319 label_end_of_loop: | 1311 label_end_of_loop: |
| 1320 result = CODING_FINISH_INSUFFICIENT_SRC; | 1312 result = CODING_FINISH_INSUFFICIENT_SRC; |
| 1321 label_end_of_loop_2: | 1313 label_end_of_loop_2: |
| 1322 src = src_base; | 1314 src = src_base; |
| 1323 break; | 1315 break; |
| 1324 } | 1316 } |
| 1325 | 1317 |
| 1326 if (result == CODING_FINISH_NORMAL | 1318 if (src < src_end) |
| 1327 && src < src_end) | 1319 { |
| 1328 result = CODING_FINISH_INSUFFICIENT_DST; | 1320 if (result == CODING_FINISH_NORMAL) |
| 1329 | 1321 result = CODING_FINISH_INSUFFICIENT_DST; |
| 1330 /* If this is the last block of the text to be decoded, we had | 1322 else if (result != CODING_FINISH_INCONSISTENT_EOL |
| 1331 better just flush out all remaining codes in the text although | 1323 && coding->mode & CODING_MODE_LAST_BLOCK) |
| 1332 they are not valid characters. */ | 1324 { |
| 1333 if (coding->mode & CODING_MODE_LAST_BLOCK) | 1325 /* This is the last block of the text to be decoded. We had |
| 1334 { | 1326 better just flush out all remaining codes in the text |
| 1335 bcopy (src, dst, src_end - src); | 1327 although they are not valid characters. */ |
| 1336 dst += (src_end - src); | 1328 src_bytes = src_end - src; |
| 1337 src = src_end; | 1329 if (dst_bytes && (dst_end - dst < src_bytes)) |
| 1338 } | 1330 src_bytes = dst_end - dst; |
| 1331 bcopy (src, dst, src_bytes); | |
| 1332 dst += src_bytes; | |
| 1333 src += src_bytes; | |
| 1334 coding->fake_multibyte = 1; | |
| 1335 } | |
| 1336 } | |
| 1337 | |
| 1339 coding->consumed = coding->consumed_char = src - source; | 1338 coding->consumed = coding->consumed_char = src - source; |
| 1340 coding->produced = dst - destination; | 1339 coding->produced = dst - destination; |
| 1341 return result; | 1340 return result; |
| 1342 } | 1341 } |
| 1343 | 1342 |
| 1411 #define ENCODE_SINGLE_SHIFT_2 \ | 1410 #define ENCODE_SINGLE_SHIFT_2 \ |
| 1412 do { \ | 1411 do { \ |
| 1413 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ | 1412 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ |
| 1414 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ | 1413 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ |
| 1415 else \ | 1414 else \ |
| 1416 *dst++ = ISO_CODE_SS2; \ | 1415 { \ |
| 1416 *dst++ = ISO_CODE_SS2; \ | |
| 1417 coding->fake_multibyte = 1; \ | |
| 1418 } \ | |
| 1417 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ | 1419 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ |
| 1418 } while (0) | 1420 } while (0) |
| 1419 | 1421 |
| 1420 #define ENCODE_SINGLE_SHIFT_3 \ | 1422 #define ENCODE_SINGLE_SHIFT_3 \ |
| 1421 do { \ | 1423 do { \ |
| 1422 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ | 1424 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ |
| 1423 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ | 1425 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ |
| 1424 else \ | 1426 else \ |
| 1425 *dst++ = ISO_CODE_SS3; \ | 1427 { \ |
| 1428 *dst++ = ISO_CODE_SS3; \ | |
| 1429 coding->fake_multibyte = 1; \ | |
| 1430 } \ | |
| 1426 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ | 1431 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ |
| 1427 } while (0) | 1432 } while (0) |
| 1428 | 1433 |
| 1429 /* The following four macros produce codes (control character or | 1434 /* The following four macros produce codes (control character or |
| 1430 escape sequence) for ISO2022 locking-shift functions (shift-in, | 1435 escape sequence) for ISO2022 locking-shift functions (shift-in, |
| 1744 | 1749 |
| 1745 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 1750 if (!NILP (Venable_character_unification) && NILP (unification_table)) |
| 1746 unification_table = Vstandard_character_unification_table_for_encode; | 1751 unification_table = Vstandard_character_unification_table_for_encode; |
| 1747 | 1752 |
| 1748 coding->consumed_char = 0; | 1753 coding->consumed_char = 0; |
| 1754 coding->fake_multibyte = 0; | |
| 1749 while (src < src_end && (dst_bytes | 1755 while (src < src_end && (dst_bytes |
| 1750 ? (dst < adjusted_dst_end) | 1756 ? (dst < adjusted_dst_end) |
| 1751 : (dst < src - 19))) | 1757 : (dst < src - 19))) |
| 1752 { | 1758 { |
| 1753 /* SRC_BASE remembers the start position in source in each loop. | 1759 /* SRC_BASE remembers the start position in source in each loop. |
| 1931 result = CODING_FINISH_INSUFFICIENT_SRC; | 1937 result = CODING_FINISH_INSUFFICIENT_SRC; |
| 1932 src = src_base; | 1938 src = src_base; |
| 1933 break; | 1939 break; |
| 1934 } | 1940 } |
| 1935 | 1941 |
| 1936 if (result == CODING_FINISH_NORMAL | 1942 if (src < src_end) |
| 1937 && src < src_end) | 1943 { |
| 1938 result = CODING_FINISH_INSUFFICIENT_DST; | 1944 if (result == CODING_FINISH_NORMAL) |
| 1939 | 1945 result = CODING_FINISH_INSUFFICIENT_DST; |
| 1940 /* If this is the last block of the text to be encoded, we must | 1946 else |
| 1941 reset graphic planes and registers to the initial state, and | 1947 /* If this is the last block of the text to be encoded, we |
| 1942 flush out the carryover if any. */ | 1948 must reset graphic planes and registers to the initial |
| 1943 if (coding->mode & CODING_MODE_LAST_BLOCK) | 1949 state, and flush out the carryover if any. */ |
| 1944 ENCODE_RESET_PLANE_AND_REGISTER; | 1950 if (coding->mode & CODING_MODE_LAST_BLOCK) |
| 1951 ENCODE_RESET_PLANE_AND_REGISTER; | |
| 1952 } | |
| 1945 | 1953 |
| 1946 coding->consumed = src - source; | 1954 coding->consumed = src - source; |
| 1947 coding->produced = coding->produced_char = dst - destination; | 1955 coding->produced = coding->produced_char = dst - destination; |
| 1948 return result; | 1956 return result; |
| 1949 } | 1957 } |
| 2052 else if (CHARSET_DIMENSION (charset_alt) == 1) \ | 2060 else if (CHARSET_DIMENSION (charset_alt) == 1) \ |
| 2053 { \ | 2061 { \ |
| 2054 if (sjis_p && charset_alt == charset_katakana_jisx0201) \ | 2062 if (sjis_p && charset_alt == charset_katakana_jisx0201) \ |
| 2055 *dst++ = c1; \ | 2063 *dst++ = c1; \ |
| 2056 else \ | 2064 else \ |
| 2057 *dst++ = charset_alt, *dst++ = c1; \ | 2065 { \ |
| 2066 *dst++ = charset_alt, *dst++ = c1; \ | |
| 2067 coding->fake_multibyte = 1; \ | |
| 2068 } \ | |
| 2058 } \ | 2069 } \ |
| 2059 else \ | 2070 else \ |
| 2060 { \ | 2071 { \ |
| 2061 c1 &= 0x7F, c2 &= 0x7F; \ | 2072 c1 &= 0x7F, c2 &= 0x7F; \ |
| 2062 if (sjis_p && charset_alt == charset_jisx0208) \ | 2073 if (sjis_p && charset_alt == charset_jisx0208) \ |
| 2063 { \ | 2074 { \ |
| 2064 unsigned char s1, s2; \ | 2075 unsigned char s1, s2; \ |
| 2065 \ | 2076 \ |
| 2066 ENCODE_SJIS (c1, c2, s1, s2); \ | 2077 ENCODE_SJIS (c1, c2, s1, s2); \ |
| 2067 *dst++ = s1, *dst++ = s2; \ | 2078 *dst++ = s1, *dst++ = s2; \ |
| 2079 coding->fake_multibyte = 1; \ | |
| 2068 } \ | 2080 } \ |
| 2069 else if (!sjis_p \ | 2081 else if (!sjis_p \ |
| 2070 && (charset_alt == charset_big5_1 \ | 2082 && (charset_alt == charset_big5_1 \ |
| 2071 || charset_alt == charset_big5_2)) \ | 2083 || charset_alt == charset_big5_2)) \ |
| 2072 { \ | 2084 { \ |
| 2073 unsigned char b1, b2; \ | 2085 unsigned char b1, b2; \ |
| 2074 \ | 2086 \ |
| 2075 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ | 2087 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ |
| 2076 *dst++ = b1, *dst++ = b2; \ | 2088 *dst++ = b1, *dst++ = b2; \ |
| 2077 } \ | 2089 } \ |
| 2078 else \ | 2090 else \ |
| 2079 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ | 2091 { \ |
| 2092 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ | |
| 2093 coding->fake_multibyte = 1; \ | |
| 2094 } \ | |
| 2080 } \ | 2095 } \ |
| 2081 coding->consumed_char++; \ | 2096 coding->consumed_char++; \ |
| 2082 } while (0); | 2097 } while (0); |
| 2083 | 2098 |
| 2084 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 2099 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 2153 | 2168 |
| 2154 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 2169 if (!NILP (Venable_character_unification) && NILP (unification_table)) |
| 2155 unification_table = Vstandard_character_unification_table_for_decode; | 2170 unification_table = Vstandard_character_unification_table_for_decode; |
| 2156 | 2171 |
| 2157 coding->produced_char = 0; | 2172 coding->produced_char = 0; |
| 2173 coding->fake_multibyte = 0; | |
| 2158 while (src < src_end && (dst_bytes | 2174 while (src < src_end && (dst_bytes |
| 2159 ? (dst < adjusted_dst_end) | 2175 ? (dst < adjusted_dst_end) |
| 2160 : (dst < src - 3))) | 2176 : (dst < src - 3))) |
| 2161 { | 2177 { |
| 2162 /* SRC_BASE remembers the start position in source in each loop. | 2178 /* SRC_BASE remembers the start position in source in each loop. |
| 2201 *dst++ = c1; | 2217 *dst++ = c1; |
| 2202 coding->produced_char++; | 2218 coding->produced_char++; |
| 2203 } | 2219 } |
| 2204 else if (c1 < 0x80) | 2220 else if (c1 < 0x80) |
| 2205 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); | 2221 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); |
| 2206 else if (c1 < 0xA0 || c1 >= 0xE0) | 2222 else if (c1 < 0xA0) |
| 2207 { | 2223 { |
| 2208 /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */ | 2224 /* SJIS -> JISX0208 */ |
| 2209 if (sjis_p) | 2225 if (sjis_p) |
| 2210 { | 2226 { |
| 2211 ONE_MORE_BYTE (c2); | 2227 ONE_MORE_BYTE (c2); |
| 2212 DECODE_SJIS (c1, c2, c3, c4); | 2228 if (c2 >= 0x40) |
| 2213 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | 2229 { |
| 2230 DECODE_SJIS (c1, c2, c3, c4); | |
| 2231 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | |
| 2232 } | |
| 2233 else | |
| 2234 goto label_invalid_code_2; | |
| 2214 } | 2235 } |
| 2215 else if (c1 >= 0xE0 && c1 < 0xFF) | 2236 else |
| 2216 { | 2237 goto label_invalid_code_1; |
| 2217 int charset; | 2238 } |
| 2218 | 2239 else if (c1 < 0xE0) |
| 2219 ONE_MORE_BYTE (c2); | |
| 2220 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
| 2221 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
| 2222 } | |
| 2223 else /* Invalid code */ | |
| 2224 { | |
| 2225 *dst++ = c1; | |
| 2226 coding->produced_char++; | |
| 2227 } | |
| 2228 } | |
| 2229 else | |
| 2230 { | 2240 { |
| 2231 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ | 2241 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ |
| 2232 if (sjis_p) | 2242 if (sjis_p) |
| 2233 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | 2243 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, |
| 2234 /* dummy */ c2); | 2244 /* dummy */ c2); |
| 2235 else | 2245 else |
| 2236 { | 2246 { |
| 2237 int charset; | 2247 int charset; |
| 2238 | 2248 |
| 2239 ONE_MORE_BYTE (c2); | 2249 ONE_MORE_BYTE (c2); |
| 2240 DECODE_BIG5 (c1, c2, charset, c3, c4); | 2250 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) |
| 2241 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | 2251 { |
| 2252 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
| 2253 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
| 2254 } | |
| 2255 else | |
| 2256 goto label_invalid_code_2; | |
| 2242 } | 2257 } |
| 2243 } | 2258 } |
| 2259 else /* C1 >= 0xE0 */ | |
| 2260 { | |
| 2261 /* SJIS -> JISX0208, BIG5 -> Big5 */ | |
| 2262 if (sjis_p) | |
| 2263 { | |
| 2264 ONE_MORE_BYTE (c2); | |
| 2265 if (c2 >= 0x40) | |
| 2266 { | |
| 2267 DECODE_SJIS (c1, c2, c3, c4); | |
| 2268 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | |
| 2269 } | |
| 2270 else | |
| 2271 goto label_invalid_code_2; | |
| 2272 } | |
| 2273 else | |
| 2274 { | |
| 2275 int charset; | |
| 2276 | |
| 2277 ONE_MORE_BYTE (c2); | |
| 2278 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) | |
| 2279 { | |
| 2280 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
| 2281 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
| 2282 } | |
| 2283 else | |
| 2284 goto label_invalid_code_2; | |
| 2285 } | |
| 2286 } | |
| 2287 continue; | |
| 2288 | |
| 2289 label_invalid_code_1: | |
| 2290 *dst++ = c1; | |
| 2291 coding->produced_char++; | |
| 2292 coding->fake_multibyte = 1; | |
| 2293 continue; | |
| 2294 | |
| 2295 label_invalid_code_2: | |
| 2296 *dst++ = c1; *dst++= c2; | |
| 2297 coding->produced_char += 2; | |
| 2298 coding->fake_multibyte = 1; | |
| 2244 continue; | 2299 continue; |
| 2245 | 2300 |
| 2246 label_end_of_loop: | 2301 label_end_of_loop: |
| 2247 result = CODING_FINISH_INSUFFICIENT_SRC; | 2302 result = CODING_FINISH_INSUFFICIENT_SRC; |
| 2248 label_end_of_loop_2: | 2303 label_end_of_loop_2: |
| 2249 src = src_base; | 2304 src = src_base; |
| 2250 break; | 2305 break; |
| 2251 } | 2306 } |
| 2252 | 2307 |
| 2253 if (result == CODING_FINISH_NORMAL | 2308 if (src < src_end) |
| 2254 && src < src_end) | 2309 { |
| 2255 result = CODING_FINISH_INSUFFICIENT_DST; | 2310 if (result == CODING_FINISH_NORMAL) |
| 2311 result = CODING_FINISH_INSUFFICIENT_DST; | |
| 2312 else if (result != CODING_FINISH_INCONSISTENT_EOL | |
| 2313 && coding->mode & CODING_MODE_LAST_BLOCK) | |
| 2314 { | |
| 2315 src_bytes = src_end - src; | |
| 2316 if (dst_bytes && (dst_end - dst < src_bytes)) | |
| 2317 src_bytes = dst_end - dst; | |
| 2318 bcopy (dst, src, src_bytes); | |
| 2319 src += src_bytes; | |
| 2320 dst += src_bytes; | |
| 2321 coding->fake_multibyte = 1; | |
| 2322 } | |
| 2323 } | |
| 2256 | 2324 |
| 2257 coding->consumed = coding->consumed_char = src - source; | 2325 coding->consumed = coding->consumed_char = src - source; |
| 2258 coding->produced = dst - destination; | 2326 coding->produced = dst - destination; |
| 2259 return result; | 2327 return result; |
| 2260 } | 2328 } |
| 2289 | 2357 |
| 2290 if (!NILP (Venable_character_unification) && NILP (unification_table)) | 2358 if (!NILP (Venable_character_unification) && NILP (unification_table)) |
| 2291 unification_table = Vstandard_character_unification_table_for_encode; | 2359 unification_table = Vstandard_character_unification_table_for_encode; |
| 2292 | 2360 |
| 2293 coding->consumed_char = 0; | 2361 coding->consumed_char = 0; |
| 2362 coding->fake_multibyte = 0; | |
| 2294 while (src < src_end && (dst_bytes | 2363 while (src < src_end && (dst_bytes |
| 2295 ? (dst < adjusted_dst_end) | 2364 ? (dst < adjusted_dst_end) |
| 2296 : (dst < src - 1))) | 2365 : (dst < src - 1))) |
| 2297 { | 2366 { |
| 2298 /* SRC_BASE remembers the start position in source in each loop. | 2367 /* SRC_BASE remembers the start position in source in each loop. |
| 2400 { | 2469 { |
| 2401 unsigned char *src = source; | 2470 unsigned char *src = source; |
| 2402 unsigned char *src_end = source + src_bytes; | 2471 unsigned char *src_end = source + src_bytes; |
| 2403 unsigned char *dst = destination; | 2472 unsigned char *dst = destination; |
| 2404 unsigned char *dst_end = destination + dst_bytes; | 2473 unsigned char *dst_end = destination + dst_bytes; |
| 2474 unsigned char c; | |
| 2405 int result = CODING_FINISH_NORMAL; | 2475 int result = CODING_FINISH_NORMAL; |
| 2476 | |
| 2477 coding->fake_multibyte = 0; | |
| 2406 | 2478 |
| 2407 if (src_bytes <= 0) | 2479 if (src_bytes <= 0) |
| 2408 return result; | 2480 return result; |
| 2409 | 2481 |
| 2410 switch (coding->eol_type) | 2482 switch (coding->eol_type) |
| 2419 while (src < src_end && (dst_bytes | 2491 while (src < src_end && (dst_bytes |
| 2420 ? (dst < adjusted_dst_end) | 2492 ? (dst < adjusted_dst_end) |
| 2421 : (dst < src - 1))) | 2493 : (dst < src - 1))) |
| 2422 { | 2494 { |
| 2423 unsigned char *src_base = src; | 2495 unsigned char *src_base = src; |
| 2424 unsigned char c = *src++; | 2496 |
| 2497 c = *src++; | |
| 2425 if (c == '\r') | 2498 if (c == '\r') |
| 2426 { | 2499 { |
| 2427 ONE_MORE_BYTE (c); | 2500 ONE_MORE_BYTE (c); |
| 2428 if (c != '\n') | 2501 if (c != '\n') |
| 2429 { | 2502 { |
| 2431 { | 2504 { |
| 2432 result = CODING_FINISH_INCONSISTENT_EOL; | 2505 result = CODING_FINISH_INCONSISTENT_EOL; |
| 2433 goto label_end_of_loop_2; | 2506 goto label_end_of_loop_2; |
| 2434 } | 2507 } |
| 2435 *dst++ = '\r'; | 2508 *dst++ = '\r'; |
| 2509 if (BASE_LEADING_CODE_P (c)) | |
| 2510 coding->fake_multibyte = 1; | |
| 2436 } | 2511 } |
| 2437 *dst++ = c; | 2512 *dst++ = c; |
| 2438 } | 2513 } |
| 2439 else if (c == '\n' | 2514 else if (c == '\n' |
| 2440 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)) | 2515 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)) |
| 2441 { | 2516 { |
| 2442 result = CODING_FINISH_INCONSISTENT_EOL; | 2517 result = CODING_FINISH_INCONSISTENT_EOL; |
| 2443 goto label_end_of_loop_2; | 2518 goto label_end_of_loop_2; |
| 2444 } | 2519 } |
| 2445 else | 2520 else |
| 2446 *dst++ = c; | 2521 { |
| 2522 *dst++ = c; | |
| 2523 if (BASE_LEADING_CODE_P (c)) | |
| 2524 coding->fake_multibyte = 1; | |
| 2525 } | |
| 2447 continue; | 2526 continue; |
| 2448 | 2527 |
| 2449 label_end_of_loop: | 2528 label_end_of_loop: |
| 2450 result = CODING_FINISH_INSUFFICIENT_SRC; | 2529 result = CODING_FINISH_INSUFFICIENT_SRC; |
| 2451 label_end_of_loop_2: | 2530 label_end_of_loop_2: |
| 2459 break; | 2538 break; |
| 2460 | 2539 |
| 2461 case CODING_EOL_CR: | 2540 case CODING_EOL_CR: |
| 2462 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | 2541 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) |
| 2463 { | 2542 { |
| 2464 while (src < src_end) if (*src++ == '\n') break; | 2543 while (src < src_end) |
| 2544 { | |
| 2545 if ((c = *src++) == '\n') | |
| 2546 break; | |
| 2547 if (BASE_LEADING_CODE_P (c)) | |
| 2548 coding->fake_multibyte = 1; | |
| 2549 } | |
| 2465 if (*--src == '\n') | 2550 if (*--src == '\n') |
| 2466 { | 2551 { |
| 2467 src_bytes = src - source; | 2552 src_bytes = src - source; |
| 2468 result = CODING_FINISH_INCONSISTENT_EOL; | 2553 result = CODING_FINISH_INCONSISTENT_EOL; |
| 2469 } | 2554 } |
| 2491 bcopy (source, destination, src_bytes); | 2576 bcopy (source, destination, src_bytes); |
| 2492 else | 2577 else |
| 2493 safe_bcopy (source, destination, src_bytes); | 2578 safe_bcopy (source, destination, src_bytes); |
| 2494 src += src_bytes; | 2579 src += src_bytes; |
| 2495 dst += dst_bytes; | 2580 dst += dst_bytes; |
| 2581 coding->fake_multibyte = 1; | |
| 2496 break; | 2582 break; |
| 2497 } | 2583 } |
| 2498 | 2584 |
| 2499 coding->consumed = coding->consumed_char = src - source; | 2585 coding->consumed = coding->consumed_char = src - source; |
| 2500 coding->produced = coding->produced_char = dst - destination; | 2586 coding->produced = coding->produced_char = dst - destination; |
| 2512 int src_bytes, dst_bytes; | 2598 int src_bytes, dst_bytes; |
| 2513 { | 2599 { |
| 2514 unsigned char *src = source; | 2600 unsigned char *src = source; |
| 2515 unsigned char *dst = destination; | 2601 unsigned char *dst = destination; |
| 2516 int result = CODING_FINISH_NORMAL; | 2602 int result = CODING_FINISH_NORMAL; |
| 2603 | |
| 2604 coding->fake_multibyte = 0; | |
| 2517 | 2605 |
| 2518 if (coding->eol_type == CODING_EOL_CRLF) | 2606 if (coding->eol_type == CODING_EOL_CRLF) |
| 2519 { | 2607 { |
| 2520 unsigned char c; | 2608 unsigned char c; |
| 2521 unsigned char *src_end = source + src_bytes; | 2609 unsigned char *src_end = source + src_bytes; |
| 2532 c = *src++; | 2620 c = *src++; |
| 2533 if (c == '\n' | 2621 if (c == '\n' |
| 2534 || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) | 2622 || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) |
| 2535 *dst++ = '\r', *dst++ = '\n'; | 2623 *dst++ = '\r', *dst++ = '\n'; |
| 2536 else | 2624 else |
| 2537 *dst++ = c; | 2625 { |
| 2626 *dst++ = c; | |
| 2627 if (BASE_LEADING_CODE_P (c)) | |
| 2628 coding->fake_multibyte = 1; | |
| 2629 } | |
| 2538 } | 2630 } |
| 2539 if (src < src_end) | 2631 if (src < src_end) |
| 2540 result = CODING_FINISH_INSUFFICIENT_DST; | 2632 result = CODING_FINISH_INSUFFICIENT_DST; |
| 2541 } | 2633 } |
| 2542 else | 2634 else |
| 2543 { | 2635 { |
| 2636 unsigned char c; | |
| 2637 | |
| 2544 if (dst_bytes && src_bytes > dst_bytes) | 2638 if (dst_bytes && src_bytes > dst_bytes) |
| 2545 { | 2639 { |
| 2546 src_bytes = dst_bytes; | 2640 src_bytes = dst_bytes; |
| 2547 result = CODING_FINISH_INSUFFICIENT_DST; | 2641 result = CODING_FINISH_INSUFFICIENT_DST; |
| 2548 } | 2642 } |
| 2549 if (dst_bytes) | 2643 if (dst_bytes) |
| 2550 bcopy (source, destination, src_bytes); | 2644 bcopy (source, destination, src_bytes); |
| 2551 else | 2645 else |
| 2552 safe_bcopy (source, destination, src_bytes); | 2646 { |
| 2647 safe_bcopy (source, destination, src_bytes); | |
| 2648 dst_bytes = src_bytes; | |
| 2649 } | |
| 2553 if (coding->eol_type == CODING_EOL_CRLF) | 2650 if (coding->eol_type == CODING_EOL_CRLF) |
| 2554 { | 2651 { |
| 2555 while (src_bytes--) | 2652 while (src_bytes--) |
| 2556 if (*dst++ == '\n') dst[-1] = '\r'; | 2653 { |
| 2557 } | 2654 if ((c = *dst++) == '\n') |
| 2558 else if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) | 2655 dst[-1] = '\r'; |
| 2559 { | 2656 else if (BASE_LEADING_CODE_P (c)) |
| 2560 while (src_bytes--) | 2657 coding->fake_multibyte = 1; |
| 2561 if (*dst++ == '\r') dst[-1] = '\n'; | 2658 } |
| 2562 } | 2659 } |
| 2563 src += src_bytes; | 2660 else |
| 2564 dst += src_bytes; | 2661 { |
| 2662 if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) | |
| 2663 { | |
| 2664 while (src_bytes--) | |
| 2665 if (*dst++ == '\r') dst[-1] = '\n'; | |
| 2666 } | |
| 2667 coding->fake_multibyte = 1; | |
| 2668 } | |
| 2669 src = source + dst_bytes; | |
| 2670 dst = destination + dst_bytes; | |
| 2565 } | 2671 } |
| 2566 | 2672 |
| 2567 coding->consumed = coding->consumed_char = src - source; | 2673 coding->consumed = coding->consumed_char = src - source; |
| 2568 coding->produced = coding->produced_char = dst - destination; | 2674 coding->produced = coding->produced_char = dst - destination; |
| 2569 return result; | 2675 return result; |
| 3456 | 3562 |
| 3457 if (src_bytes <= 0) | 3563 if (src_bytes <= 0) |
| 3458 { | 3564 { |
| 3459 coding->produced = coding->produced_char = 0; | 3565 coding->produced = coding->produced_char = 0; |
| 3460 coding->consumed = coding->consumed_char = 0; | 3566 coding->consumed = coding->consumed_char = 0; |
| 3567 coding->fake_multibyte = 0; | |
| 3461 return CODING_FINISH_NORMAL; | 3568 return CODING_FINISH_NORMAL; |
| 3462 } | 3569 } |
| 3463 | 3570 |
| 3464 if (coding->type == coding_type_undecided) | 3571 if (coding->type == coding_type_undecided) |
| 3465 detect_coding (coding, source, src_bytes); | 3572 detect_coding (coding, source, src_bytes); |
| 3512 } | 3619 } |
| 3513 if (dst_bytes) | 3620 if (dst_bytes) |
| 3514 bcopy (source, destination, coding->produced); | 3621 bcopy (source, destination, coding->produced); |
| 3515 else | 3622 else |
| 3516 safe_bcopy (source, destination, coding->produced); | 3623 safe_bcopy (source, destination, coding->produced); |
| 3624 coding->fake_multibyte = 1; | |
| 3517 coding->consumed | 3625 coding->consumed |
| 3518 = coding->consumed_char = coding->produced_char = coding->produced; | 3626 = coding->consumed_char = coding->produced_char = coding->produced; |
| 3519 break; | 3627 break; |
| 3520 } | 3628 } |
| 3521 | 3629 |
| 3534 | 3642 |
| 3535 if (src_bytes <= 0) | 3643 if (src_bytes <= 0) |
| 3536 { | 3644 { |
| 3537 coding->produced = coding->produced_char = 0; | 3645 coding->produced = coding->produced_char = 0; |
| 3538 coding->consumed = coding->consumed_char = 0; | 3646 coding->consumed = coding->consumed_char = 0; |
| 3647 coding->fake_multibyte = 0; | |
| 3539 return CODING_FINISH_NORMAL; | 3648 return CODING_FINISH_NORMAL; |
| 3540 } | 3649 } |
| 3541 | 3650 |
| 3542 switch (coding->type) | 3651 switch (coding->type) |
| 3543 { | 3652 { |
| 3590 { | 3699 { |
| 3591 unsigned char *p = destination, *pend = p + coding->produced; | 3700 unsigned char *p = destination, *pend = p + coding->produced; |
| 3592 while (p < pend) | 3701 while (p < pend) |
| 3593 if (*p++ == '\015') p[-1] = '\n'; | 3702 if (*p++ == '\015') p[-1] = '\n'; |
| 3594 } | 3703 } |
| 3704 coding->fake_multibyte = 1; | |
| 3595 coding->consumed | 3705 coding->consumed |
| 3596 = coding->consumed_char = coding->produced_char = coding->produced; | 3706 = coding->consumed_char = coding->produced_char = coding->produced; |
| 3597 break; | 3707 break; |
| 3598 } | 3708 } |
| 3599 | 3709 |
| 3600 return result; | 3710 return result; |
| 3601 } | 3711 } |
| 3602 | 3712 |
| 3603 /* Scan text in the region between *BEG and *END, skip characters | 3713 /* Scan text in the region between *BEG and *END (byte positions), |
| 3604 which we don't have to decode by coding system CODING at the head | 3714 skip characters which we don't have to decode by coding system |
| 3605 and tail, then set *BEG and *END to the region of the text we | 3715 CODING at the head and tail, then set *BEG and *END to the region |
| 3606 actually have to convert. | 3716 of the text we actually have to convert. The caller should move |
| 3717 the gap out of the region in advance. | |
| 3607 | 3718 |
| 3608 If STR is not NULL, *BEG and *END are indices into STR. */ | 3719 If STR is not NULL, *BEG and *END are indices into STR. */ |
| 3609 | 3720 |
| 3610 static void | 3721 static void |
| 3611 shrink_decoding_region (beg, end, coding, str) | 3722 shrink_decoding_region (beg, end, coding, str) |
| 3612 int *beg, *end; | 3723 int *beg, *end; |
| 3613 struct coding_system *coding; | 3724 struct coding_system *coding; |
| 3614 unsigned char *str; | 3725 unsigned char *str; |
| 3615 { | 3726 { |
| 3616 unsigned char *begp_orig, *begp, *endp_orig, *endp; | 3727 unsigned char *begp_orig, *begp, *endp_orig, *endp, c; |
| 3617 int eol_conversion; | 3728 int eol_conversion; |
| 3618 | 3729 |
| 3619 if (coding->type == coding_type_ccl | 3730 if (coding->type == coding_type_ccl |
| 3620 || coding->type == coding_type_undecided | 3731 || coding->type == coding_type_undecided |
| 3621 || !NILP (coding->post_read_conversion)) | 3732 || !NILP (coding->post_read_conversion)) |
| 3623 /* We can't skip any data. */ | 3734 /* We can't skip any data. */ |
| 3624 return; | 3735 return; |
| 3625 } | 3736 } |
| 3626 else if (coding->type == coding_type_no_conversion) | 3737 else if (coding->type == coding_type_no_conversion) |
| 3627 { | 3738 { |
| 3628 /* We need no conversion. */ | 3739 /* We need no conversion, but don't have to skip any data here. |
| 3629 *beg = *end; | 3740 Decoding routine handles them effectively anyway. */ |
| 3630 return; | 3741 return; |
| 3631 } | 3742 } |
| 3632 | 3743 |
| 3633 if (coding->heading_ascii >= 0) | 3744 if (coding->heading_ascii >= 0) |
| 3634 /* Detection routine has already found how much we can skip at the | 3745 /* Detection routine has already found how much we can skip at the |
| 3640 begp_orig = begp = str + *beg; | 3751 begp_orig = begp = str + *beg; |
| 3641 endp_orig = endp = str + *end; | 3752 endp_orig = endp = str + *end; |
| 3642 } | 3753 } |
| 3643 else | 3754 else |
| 3644 { | 3755 { |
| 3645 move_gap (*beg); | 3756 begp_orig = begp = BYTE_POS_ADDR (*beg); |
| 3646 begp_orig = begp = GAP_END_ADDR; | |
| 3647 endp_orig = endp = begp + *end - *beg; | 3757 endp_orig = endp = begp + *end - *beg; |
| 3648 } | 3758 } |
| 3649 | 3759 |
| 3650 eol_conversion = (coding->eol_type != CODING_EOL_LF); | 3760 eol_conversion = (coding->eol_type != CODING_EOL_LF); |
| 3651 | 3761 |
| 3654 case coding_type_emacs_mule: | 3764 case coding_type_emacs_mule: |
| 3655 case coding_type_raw_text: | 3765 case coding_type_raw_text: |
| 3656 if (eol_conversion) | 3766 if (eol_conversion) |
| 3657 { | 3767 { |
| 3658 if (coding->heading_ascii < 0) | 3768 if (coding->heading_ascii < 0) |
| 3659 while (begp < endp && *begp != '\r') begp++; | 3769 while (begp < endp && *begp != '\r' && *begp < 0x80) begp++; |
| 3660 while (begp < endp && *(endp - 1) != '\r') endp--; | 3770 while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80) |
| 3771 endp--; | |
| 3661 } | 3772 } |
| 3662 else | 3773 else |
| 3663 begp = endp; | 3774 begp = endp; |
| 3664 break; | 3775 break; |
| 3665 | 3776 |
| 3684 break; | 3795 break; |
| 3685 | 3796 |
| 3686 default: /* i.e. case coding_type_iso2022: */ | 3797 default: /* i.e. case coding_type_iso2022: */ |
| 3687 if (coding->heading_ascii < 0) | 3798 if (coding->heading_ascii < 0) |
| 3688 { | 3799 { |
| 3689 unsigned char c; | |
| 3690 | |
| 3691 /* We can skip all ASCII characters at the head except for a | 3800 /* We can skip all ASCII characters at the head except for a |
| 3692 few control codes. */ | 3801 few control codes. */ |
| 3693 while (begp < endp && (c = *begp) < 0x80 | 3802 while (begp < endp && (c = *begp) < 0x80 |
| 3694 && c != ISO_CODE_CR && c != ISO_CODE_SO | 3803 && c != ISO_CODE_CR && c != ISO_CODE_SO |
| 3695 && c != ISO_CODE_SI && c != ISO_CODE_ESC | 3804 && c != ISO_CODE_SI && c != ISO_CODE_ESC |
| 3700 { | 3809 { |
| 3701 case CODING_CATEGORY_IDX_ISO_8_1: | 3810 case CODING_CATEGORY_IDX_ISO_8_1: |
| 3702 case CODING_CATEGORY_IDX_ISO_8_2: | 3811 case CODING_CATEGORY_IDX_ISO_8_2: |
| 3703 /* We can skip all ASCII characters at the tail. */ | 3812 /* We can skip all ASCII characters at the tail. */ |
| 3704 if (eol_conversion) | 3813 if (eol_conversion) |
| 3705 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--; | 3814 while (begp < endp && (c = endp[-1]) < 0x80 && c != '\n') endp--; |
| 3706 else | 3815 else |
| 3707 while (begp < endp && endp[-1] < 0x80) endp--; | 3816 while (begp < endp && endp[-1] < 0x80) endp--; |
| 3708 break; | 3817 break; |
| 3709 | 3818 |
| 3710 case CODING_CATEGORY_IDX_ISO_7: | 3819 case CODING_CATEGORY_IDX_ISO_7: |
| 3711 case CODING_CATEGORY_IDX_ISO_7_TIGHT: | 3820 case CODING_CATEGORY_IDX_ISO_7_TIGHT: |
| 3712 /* We can skip all charactes at the tail except for ESC and | 3821 /* We can skip all charactes at the tail except for ESC and |
| 3713 the following 2-byte at the tail. */ | 3822 the following 2-byte at the tail. */ |
| 3714 if (eol_conversion) | 3823 if (eol_conversion) |
| 3715 while (begp < endp && endp[-1] != ISO_CODE_ESC && endp[-1] != '\n') | 3824 while (begp < endp |
| 3825 && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\n') | |
| 3716 endp--; | 3826 endp--; |
| 3717 else | 3827 else |
| 3718 while (begp < endp && endp[-1] != ISO_CODE_ESC) | 3828 while (begp < endp |
| 3829 && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC) | |
| 3719 endp--; | 3830 endp--; |
| 3720 if (begp < endp && endp[-1] == ISO_CODE_ESC) | 3831 if (begp < endp && endp[-1] == ISO_CODE_ESC) |
| 3721 { | 3832 { |
| 3722 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') | 3833 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') |
| 3723 /* This is an ASCII designation sequence. We can | 3834 /* This is an ASCII designation sequence. We can |
| 3760 begp_orig = begp = str + *beg; | 3871 begp_orig = begp = str + *beg; |
| 3761 endp_orig = endp = str + *end; | 3872 endp_orig = endp = str + *end; |
| 3762 } | 3873 } |
| 3763 else | 3874 else |
| 3764 { | 3875 { |
| 3765 move_gap (*beg); | 3876 begp_orig = begp = BYTE_POS_ADDR (*beg); |
| 3766 begp_orig = begp = GAP_END_ADDR; | |
| 3767 endp_orig = endp = begp + *end - *beg; | 3877 endp_orig = endp = begp + *end - *beg; |
| 3768 } | 3878 } |
| 3769 | 3879 |
| 3770 eol_conversion = (coding->eol_type == CODING_EOL_CR | 3880 eol_conversion = (coding->eol_type == CODING_EOL_CR |
| 3771 || coding->eol_type == CODING_EOL_CRLF); | 3881 || coding->eol_type == CODING_EOL_CRLF); |
| 3819 *end += endp - endp_orig; | 3929 *end += endp - endp_orig; |
| 3820 return; | 3930 return; |
| 3821 } | 3931 } |
| 3822 | 3932 |
| 3823 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the | 3933 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the |
| 3824 text from FROM to TO by coding system CODING, and return number of | 3934 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by |
| 3825 characters in the resulting text. | 3935 coding system CODING, and return the status code of code conversion |
| 3936 (currently, this value has no meaning). | |
| 3937 | |
| 3938 How many characters (and bytes) are converted to how many | |
| 3939 characters (and bytes) are recorded in members of the structure | |
| 3940 CODING. | |
| 3826 | 3941 |
| 3827 If ADJUST is nonzero, we do various things as if the original text | 3942 If ADJUST is nonzero, we do various things as if the original text |
| 3828 is deleted and a new text is inserted. See the comments in | 3943 is deleted and a new text is inserted. See the comments in |
| 3829 replace_range (insdel.c) to know what we are doing. | 3944 replace_range (insdel.c) to know what we are doing. |
| 3830 | 3945 |
| 3831 ADJUST nonzero also means that post-read-conversion or | 3946 ADJUST nonzero also means that post-read-conversion or |
| 3832 pre-write-conversion functions (if any) should be processed. */ | 3947 pre-write-conversion functions (if any) should be processed. */ |
| 3833 | 3948 |
| 3834 int | 3949 int |
| 3835 code_convert_region (from, to, coding, encodep, adjust) | 3950 code_convert_region (from, from_byte, to, to_byte, coding, encodep, adjust) |
| 3836 int from, to, encodep, adjust; | 3951 int from, from_byte, to, to_byte, encodep, adjust; |
| 3837 struct coding_system *coding; | 3952 struct coding_system *coding; |
| 3838 { | 3953 { |
| 3839 int len = to - from, require, inserted, inserted_byte; | 3954 int len = to - from, len_byte = to_byte - from_byte; |
| 3840 int from_byte, to_byte, len_byte; | 3955 int require, inserted, inserted_byte; |
| 3841 int from_byte_orig, to_byte_orig; | 3956 int from_byte_orig, to_byte_orig; |
| 3842 Lisp_Object saved_coding_symbol = Qnil; | 3957 Lisp_Object saved_coding_symbol = Qnil; |
| 3958 int multibyte = !NILP (current_buffer->enable_multibyte_characters); | |
| 3959 int first = 1; | |
| 3960 int fake_multibyte = 0; | |
| 3961 unsigned char *src, *dst; | |
| 3843 | 3962 |
| 3844 if (adjust) | 3963 if (adjust) |
| 3845 { | 3964 { |
| 3965 int saved_from = from; | |
| 3966 | |
| 3846 prepare_to_modify_buffer (from, to, &from); | 3967 prepare_to_modify_buffer (from, to, &from); |
| 3847 to = from + len; | 3968 if (saved_from != from) |
| 3848 } | 3969 { |
| 3849 from_byte = CHAR_TO_BYTE (from); to_byte = CHAR_TO_BYTE (to); | 3970 to = from + len; |
| 3850 len_byte = to_byte - from_byte; | 3971 if (multibyte) |
| 3972 from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to); | |
| 3973 else | |
| 3974 from_byte = from, to_byte = to; | |
| 3975 len_byte = to_byte - from_byte; | |
| 3976 } | |
| 3977 } | |
| 3851 | 3978 |
| 3852 if (! encodep && CODING_REQUIRE_DETECTION (coding)) | 3979 if (! encodep && CODING_REQUIRE_DETECTION (coding)) |
| 3853 { | 3980 { |
| 3854 /* We must detect encoding of text and eol. Even if detection | 3981 /* We must detect encoding of text and eol. Even if detection |
| 3855 routines can't decide the encoding, we should not let them | 3982 routines can't decide the encoding, we should not let them |
| 3858 | 3985 |
| 3859 if (from < GPT && to > GPT) | 3986 if (from < GPT && to > GPT) |
| 3860 move_gap_both (from, from_byte); | 3987 move_gap_both (from, from_byte); |
| 3861 if (coding->type == coding_type_undecided) | 3988 if (coding->type == coding_type_undecided) |
| 3862 { | 3989 { |
| 3863 detect_coding (coding, BYTE_POS_ADDR (from), len); | 3990 detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte); |
| 3864 if (coding->type == coding_type_undecided) | 3991 if (coding->type == coding_type_undecided) |
| 3865 coding->type = coding_type_emacs_mule; | 3992 coding->type = coding_type_emacs_mule; |
| 3866 } | 3993 } |
| 3867 if (coding->eol_type == CODING_EOL_UNDECIDED) | 3994 if (coding->eol_type == CODING_EOL_UNDECIDED) |
| 3868 { | 3995 { |
| 3874 encounter an inconsitent eol format while decoding. */ | 4001 encounter an inconsitent eol format while decoding. */ |
| 3875 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; | 4002 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; |
| 3876 } | 4003 } |
| 3877 } | 4004 } |
| 3878 | 4005 |
| 4006 coding->consumed_char = len, coding->consumed = len_byte; | |
| 4007 | |
| 3879 if (encodep | 4008 if (encodep |
| 3880 ? ! CODING_REQUIRE_ENCODING (coding) | 4009 ? ! CODING_REQUIRE_ENCODING (coding) |
| 3881 : ! CODING_REQUIRE_DECODING (coding)) | 4010 : ! CODING_REQUIRE_DECODING (coding)) |
| 3882 return len; | 4011 { |
| 4012 coding->produced = len_byte; | |
| 4013 if (multibyte) | |
| 4014 { | |
| 4015 if (GPT < from || GPT > to) | |
| 4016 move_gap_both (from, from_byte); | |
| 4017 coding->produced_char | |
| 4018 = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte); | |
| 4019 if (coding->produced_char != len) | |
| 4020 { | |
| 4021 int diff = coding->produced_char - len; | |
| 4022 | |
| 4023 if (adjust) | |
| 4024 adjust_before_replace (from, from_byte, to, to_byte); | |
| 4025 ZV += diff; Z += diff; GPT += diff; | |
| 4026 if (adjust) | |
| 4027 adjust_after_replace (from, from_byte, to, to_byte, | |
| 4028 diff, 0); | |
| 4029 } | |
| 4030 } | |
| 4031 else | |
| 4032 coding->produced_char = len_byte; | |
| 4033 return 0; | |
| 4034 } | |
| 3883 | 4035 |
| 3884 /* Now we convert the text. */ | 4036 /* Now we convert the text. */ |
| 3885 | 4037 |
| 3886 /* For encoding, we must process pre-write-conversion in advance. */ | 4038 /* For encoding, we must process pre-write-conversion in advance. */ |
| 3887 if (encodep | 4039 if (encodep |
| 3898 if (current_buffer != prev) | 4050 if (current_buffer != prev) |
| 3899 { | 4051 { |
| 3900 len = ZV - BEGV; | 4052 len = ZV - BEGV; |
| 3901 new = current_buffer; | 4053 new = current_buffer; |
| 3902 set_buffer_internal_1 (prev); | 4054 set_buffer_internal_1 (prev); |
| 3903 del_range (from, to); | 4055 del_range_2 (from, to, from_byte, to_byte); |
| 3904 insert_from_buffer (new, BEG, len, 0); | 4056 insert_from_buffer (new, BEG, len, 0); |
| 3905 to = from + len; | 4057 to = from + len; |
| 3906 to_byte = CHAR_TO_BYTE (to); | 4058 to_byte = multibyte ? CHAR_TO_BYTE (to) : to; |
| 3907 len_byte = to_byte - from_byte; | 4059 len_byte = to_byte - from_byte; |
| 3908 } | 4060 } |
| 3909 } | 4061 } |
| 3910 | 4062 |
| 3911 /* Try to skip the heading and tailing ASCIIs. */ | 4063 /* Try to skip the heading and tailing ASCIIs. */ |
| 3912 from_byte_orig = from_byte; to_byte_orig = to_byte; | 4064 from_byte_orig = from_byte; to_byte_orig = to_byte; |
| 4065 if (from < GPT && GPT < to) | |
| 4066 move_gap (from); | |
| 3913 if (encodep) | 4067 if (encodep) |
| 3914 shrink_encoding_region (&from_byte, &to_byte, coding, NULL); | 4068 shrink_encoding_region (&from_byte, &to_byte, coding, NULL); |
| 3915 else | 4069 else |
| 3916 shrink_decoding_region (&from_byte, &to_byte, coding, NULL); | 4070 shrink_decoding_region (&from_byte, &to_byte, coding, NULL); |
| 3917 if (from_byte == to_byte) | 4071 if (from_byte == to_byte) |
| 3918 return len; | 4072 { |
| 4073 coding->produced = len_byte; | |
| 4074 coding->produced_char = multibyte ? len : len_byte; | |
| 4075 return 0; | |
| 4076 } | |
| 4077 | |
| 3919 /* Here, the excluded region by shrinking contains only ASCIIs. */ | 4078 /* Here, the excluded region by shrinking contains only ASCIIs. */ |
| 3920 from += (from_byte - from_byte_orig); | 4079 from += (from_byte - from_byte_orig); |
| 3921 to += (to_byte - to_byte_orig); | 4080 to += (to_byte - to_byte_orig); |
| 3922 len = to - from; | 4081 len = to - from; |
| 3923 len_byte = to_byte - from_byte; | 4082 len_byte = to_byte - from_byte; |
| 3924 | 4083 |
| 3925 /* For converion, we must put the gap before the text to be decoded | 4084 /* For converion, we must put the gap before the text in addition to |
| 3926 in addition to make the gap larger for efficient decoding. The | 4085 making the gap larger for efficient decoding. The required gap |
| 3927 required gap size starts from 2000 which is the magic number used | 4086 size starts from 2000 which is the magic number used in make_gap. |
| 3928 in make_gap. But, after one batch of conversion, it will be | 4087 But, after one batch of conversion, it will be incremented if we |
| 3929 incremented if we find that it is not enough . */ | 4088 find that it is not enough . */ |
| 3930 require = 2000; | 4089 require = 2000; |
| 3931 | 4090 |
| 3932 if (GAP_SIZE < require) | 4091 if (GAP_SIZE < require) |
| 3933 make_gap (require - GAP_SIZE); | 4092 make_gap (require - GAP_SIZE); |
| 3934 move_gap_both (from, from_byte); | 4093 move_gap_both (from, from_byte); |
| 3940 beg_unchanged = GPT - BEG; | 4099 beg_unchanged = GPT - BEG; |
| 3941 if (Z - GPT < end_unchanged) | 4100 if (Z - GPT < end_unchanged) |
| 3942 end_unchanged = Z - GPT; | 4101 end_unchanged = Z - GPT; |
| 3943 | 4102 |
| 3944 inserted = inserted_byte = 0; | 4103 inserted = inserted_byte = 0; |
| 4104 src = GAP_END_ADDR, dst = GPT_ADDR; | |
| 4105 | |
| 4106 GAP_SIZE += len_byte; | |
| 4107 ZV -= len; | |
| 4108 Z -= len; | |
| 4109 ZV_BYTE -= len_byte; | |
| 4110 Z_BYTE -= len_byte; | |
| 4111 | |
| 3945 for (;;) | 4112 for (;;) |
| 3946 { | 4113 { |
| 3947 int result, diff_char, diff_byte; | 4114 int result; |
| 3948 | 4115 |
| 3949 /* The buffer memory is changed from: | 4116 /* The buffer memory is changed from: |
| 3950 +--------+converted-text+------------+-----original-text-----+---+ | 4117 +--------+converted-text+---------+-------original-text------+---+ |
| 3951 |<-from->|<--inserted-->|<-GAP_SIZE->|<---------len--------->|---| */ | 4118 |<-from->|<--inserted-->|---------|<-----------len---------->|---| |
| 3952 | 4119 |<------------------- GAP_SIZE -------------------->| */ |
| 3953 if (encodep) | 4120 if (encodep) |
| 3954 result = encode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); | 4121 result = encode_coding (coding, src, dst, len_byte, 0); |
| 3955 else | 4122 else |
| 3956 result = decode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); | 4123 result = decode_coding (coding, src, dst, len_byte, 0); |
| 3957 /* to: | 4124 /* to: |
| 3958 +--------+-------converted-text--------+--+---original-text--+---+ | 4125 +--------+-------converted-text--------+--+---original-text--+---+ |
| 3959 |<-from->|<----(inserted+produced)---->|--|<-(len-consumed)->|---| */ | 4126 |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---| |
| 3960 | 4127 |<------------------- GAP_SIZE -------------------->| */ |
| 3961 diff_char = coding->produced_char - coding->consumed_char; | 4128 if (coding->fake_multibyte) |
| 3962 diff_byte = coding->produced - coding->consumed; | 4129 fake_multibyte = 1; |
| 3963 | 4130 |
| 3964 GAP_SIZE -= diff_byte; | 4131 if (!encodep && !multibyte) |
| 3965 ZV += diff_char; ZV_BYTE += diff_byte; | 4132 coding->produced_char = coding->produced; |
| 3966 Z += diff_char; Z_BYTE += diff_byte; | |
| 3967 GPT += coding->produced_char; GPT_BYTE += coding->produced; | |
| 3968 | |
| 3969 inserted += coding->produced_char; | 4133 inserted += coding->produced_char; |
| 3970 inserted_byte += coding->produced; | 4134 inserted_byte += coding->produced; |
| 3971 len -= coding->consumed_char; | |
| 3972 len_byte -= coding->consumed; | 4135 len_byte -= coding->consumed; |
| 4136 src += coding->consumed; | |
| 4137 dst += inserted_byte; | |
| 3973 | 4138 |
| 3974 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) | 4139 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) |
| 3975 { | 4140 { |
| 3976 unsigned char *p = GPT_ADDR - inserted_byte, *pend = GPT_ADDR; | 4141 unsigned char *pend = dst, *p = pend - inserted_byte; |
| 3977 | 4142 |
| 3978 /* Encode LFs back to the original eol format (CR or CRLF). */ | 4143 /* Encode LFs back to the original eol format (CR or CRLF). */ |
| 3979 if (coding->eol_type == CODING_EOL_CR) | 4144 if (coding->eol_type == CODING_EOL_CR) |
| 3980 { | 4145 { |
| 3981 while (p < pend) if (*p++ == '\n') p[-1] = '\r'; | 4146 while (p < pend) if (*p++ == '\n') p[-1] = '\r'; |
| 3982 } | 4147 } |
| 3983 else | 4148 else |
| 3984 { | 4149 { |
| 3985 unsigned char *p2 = p; | |
| 3986 int count = 0; | 4150 int count = 0; |
| 3987 | 4151 |
| 3988 while (p2 < pend) if (*p2++ == '\n') count++; | 4152 while (p < pend) if (*p++ == '\n') count++; |
| 3989 if (GAP_SIZE < count) | 4153 if (src - dst < count) |
| 3990 make_gap (count - GAP_SIZE); | |
| 3991 p2 = GPT_ADDR + count; | |
| 3992 while (p < pend) | |
| 3993 { | 4154 { |
| 3994 *--p2 = *--pend; | 4155 /* We don't have sufficient room for putting LFs |
| 3995 if (*pend == '\n') *--p2 = '\r'; | 4156 back to CRLF. We must record converted and |
| 4157 not-yet-converted text back to the buffer | |
| 4158 content, enlarge the gap, then record them out of | |
| 4159 the buffer contents again. */ | |
| 4160 int add = len_byte + inserted_byte; | |
| 4161 | |
| 4162 GAP_SIZE -= add; | |
| 4163 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; | |
| 4164 GPT += inserted_byte; GPT_BYTE += inserted_byte; | |
| 4165 make_gap (count - GAP_SIZE); | |
| 4166 GAP_SIZE += add; | |
| 4167 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; | |
| 4168 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | |
| 4169 /* Don't forget to update SRC, DST, and PEND. */ | |
| 4170 src = GAP_END_ADDR - len_byte; | |
| 4171 dst = GPT_ADDR + inserted_byte; | |
| 4172 pend = dst; | |
| 3996 } | 4173 } |
| 3997 GPT += count; GAP_SIZE -= count; ZV += count; Z += count; | |
| 3998 ZV_BYTE += count; Z_BYTE += count; | |
| 3999 coding->produced += count; | |
| 4000 coding->produced_char += count; | |
| 4001 inserted += count; | 4174 inserted += count; |
| 4002 inserted_byte += count; | 4175 inserted_byte += count; |
| 4176 coding->produced += count; | |
| 4177 p = dst = pend + count; | |
| 4178 while (count) | |
| 4179 { | |
| 4180 *--p = *--pend; | |
| 4181 if (*p == '\n') count--, *--p = '\r'; | |
| 4182 } | |
| 4003 } | 4183 } |
| 4004 | 4184 |
| 4005 /* Suppress eol-format conversion in the further conversion. */ | 4185 /* Suppress eol-format conversion in the further conversion. */ |
| 4006 coding->eol_type = CODING_EOL_LF; | 4186 coding->eol_type = CODING_EOL_LF; |
| 4007 | 4187 |
| 4008 /* Restore the original symbol. */ | 4188 /* Restore the original symbol. */ |
| 4009 coding->symbol = saved_coding_symbol; | 4189 coding->symbol = saved_coding_symbol; |
| 4190 | |
| 4191 continue; | |
| 4010 } | 4192 } |
| 4011 if (len_byte <= 0) | 4193 if (len_byte <= 0) |
| 4012 break; | 4194 break; |
| 4013 if (result == CODING_FINISH_INSUFFICIENT_SRC) | 4195 if (result == CODING_FINISH_INSUFFICIENT_SRC) |
| 4014 { | 4196 { |
| 4015 /* The source text ends in invalid codes. Let's just | 4197 /* The source text ends in invalid codes. Let's just |
| 4016 make them valid buffer contents, and finish conversion. */ | 4198 make them valid buffer contents, and finish conversion. */ |
| 4017 inserted += len; | 4199 inserted += len_byte; |
| 4018 inserted_byte += len_byte; | 4200 inserted_byte += len_byte; |
| 4201 while (len_byte--) | |
| 4202 *src++ = *dst++; | |
| 4203 fake_multibyte = 1; | |
| 4019 break; | 4204 break; |
| 4020 } | 4205 } |
| 4021 if (inserted == coding->produced_char) | 4206 if (first) |
| 4022 /* We have just done the first batch of conversion. Let's | 4207 { |
| 4023 reconsider the required gap size now. | 4208 /* We have just done the first batch of conversion which was |
| 4024 | 4209 stoped because of insufficient gap. Let's reconsider the |
| 4025 We have converted CONSUMED bytes into PRODUCED bytes. To | 4210 required gap size (i.e. SRT - DST) now. |
| 4026 convert the remaining LEN bytes, we may need REQUIRE bytes | 4211 |
| 4027 of gap, where: | 4212 We have converted ORIG bytes (== coding->consumed) into |
| 4028 REQUIRE + LEN = (LEN * PRODUCED / CONSUMED) | 4213 NEW bytes (coding->produced). To convert the remaining |
| 4029 REQUIRE = LEN * (PRODUCED - CONSUMED) / CONSUMED | 4214 LEN bytes, we may need REQUIRE bytes of gap, where: |
| 4030 = LEN * DIFF / CONSUMED | 4215 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG) |
| 4031 Here, we are sure that DIFF is positive. */ | 4216 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG |
| 4032 require = len_byte * diff_byte / coding->consumed; | 4217 Here, we are sure that NEW >= ORIG. */ |
| 4033 if (GAP_SIZE < require) | 4218 require = (len_byte * (coding->produced - coding->consumed) |
| 4034 make_gap (require - GAP_SIZE); | 4219 / coding->consumed); |
| 4035 } | 4220 first = 0; |
| 4036 if (GAP_SIZE > 0) *GPT_ADDR = 0; /* Put an anchor. */ | 4221 } |
| 4222 if ((src - dst) < (require + 2000)) | |
| 4223 { | |
| 4224 /* See the comment above the previous call of make_gap. */ | |
| 4225 int add = len_byte + inserted_byte; | |
| 4226 | |
| 4227 GAP_SIZE -= add; | |
| 4228 ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; | |
| 4229 GPT += inserted_byte; GPT_BYTE += inserted_byte; | |
| 4230 make_gap (require + 2000); | |
| 4231 GAP_SIZE += add; | |
| 4232 ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; | |
| 4233 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | |
| 4234 /* Don't forget to update SRC, DST. */ | |
| 4235 src = GAP_END_ADDR - len_byte; | |
| 4236 dst = GPT_ADDR + inserted_byte; | |
| 4237 } | |
| 4238 } | |
| 4239 if (src - dst > 0) *dst = 0; /* Put an anchor. */ | |
| 4240 | |
| 4241 if (multibyte && (fake_multibyte || !encodep && (to - from) != (to_byte - from_byte))) | |
| 4242 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); | |
| 4243 | |
| 4244 /* Update various buffer positions for the new text. */ | |
| 4245 GAP_SIZE -= inserted_byte; | |
| 4246 ZV += inserted; Z+= inserted; | |
| 4247 ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte; | |
| 4248 GPT += inserted; GPT_BYTE += inserted_byte; | |
| 4037 | 4249 |
| 4038 if (adjust) | 4250 if (adjust) |
| 4039 { | 4251 { |
| 4040 adjust_after_replace (from, from_byte, to, to_byte, | 4252 adjust_after_replace (from, from_byte, to, to_byte, |
| 4041 inserted, inserted_byte); | 4253 inserted, inserted_byte); |
| 4053 inserted = XFASTINT (val); | 4265 inserted = XFASTINT (val); |
| 4054 } | 4266 } |
| 4055 if (pos >= from + orig_inserted) | 4267 if (pos >= from + orig_inserted) |
| 4056 temp_set_point (current_buffer, pos + (inserted - orig_inserted)); | 4268 temp_set_point (current_buffer, pos + (inserted - orig_inserted)); |
| 4057 } | 4269 } |
| 4058 } | 4270 signal_after_change (from, to - from, inserted); |
| 4059 | 4271 } |
| 4060 return ((from_byte - from_byte_orig) + inserted + (to_byte_orig - to_byte)); | 4272 |
| 4273 { | |
| 4274 int skip = (to_byte_orig - to_byte) + (from_byte - from_byte_orig); | |
| 4275 | |
| 4276 coding->consumed = to_byte_orig - from_byte_orig; | |
| 4277 coding->consumed_char = skip + (to - from); | |
| 4278 coding->produced = skip + inserted_byte; | |
| 4279 coding->produced_char = skip + inserted; | |
| 4280 } | |
| 4281 return 0; | |
| 4061 } | 4282 } |
| 4062 | 4283 |
| 4063 Lisp_Object | 4284 Lisp_Object |
| 4064 code_convert_string (str, coding, encodep, nocopy) | 4285 code_convert_string (str, coding, encodep, nocopy) |
| 4065 Lisp_Object str; | 4286 Lisp_Object str; |
| 4093 unibyte<->multibyte conversion. */ | 4314 unibyte<->multibyte conversion. */ |
| 4094 current_buffer->enable_multibyte_characters = Qnil; | 4315 current_buffer->enable_multibyte_characters = Qnil; |
| 4095 insert_from_string (str, 0, 0, to_byte, to_byte, 0); | 4316 insert_from_string (str, 0, 0, to_byte, to_byte, 0); |
| 4096 current_buffer->enable_multibyte_characters = Qt; | 4317 current_buffer->enable_multibyte_characters = Qt; |
| 4097 } | 4318 } |
| 4098 code_convert_region (BEGV, ZV, coding, encodep, 1); | 4319 code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1); |
| 4099 if (encodep) | 4320 if (encodep) |
| 4100 /* We must return the buffer contents as unibyte string. */ | 4321 /* We must return the buffer contents as unibyte string. */ |
| 4101 current_buffer->enable_multibyte_characters = Qnil; | 4322 current_buffer->enable_multibyte_characters = Qnil; |
| 4102 str = make_buffer_string (BEGV, ZV, 0); | 4323 str = make_buffer_string (BEGV, ZV, 0); |
| 4103 set_buffer_internal (prev); | 4324 set_buffer_internal (prev); |
| 4375 | 4596 |
| 4376 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 4597 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 4377 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); | 4598 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); |
| 4378 | 4599 |
| 4379 coding.mode |= CODING_MODE_LAST_BLOCK; | 4600 coding.mode |= CODING_MODE_LAST_BLOCK; |
| 4380 len = code_convert_region (from, to, &coding, encodep, 1); | 4601 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), |
| 4381 return make_number (len); | 4602 &coding, encodep, 1); |
| 4603 return make_number (coding.produced_char); | |
| 4382 } | 4604 } |
| 4383 | 4605 |
| 4384 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, | 4606 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, |
| 4385 3, 3, "r\nzCoding system: ", | 4607 3, 3, "r\nzCoding system: ", |
| 4386 "Decode the current region by specified coding system.\n\ | 4608 "Decode the current region by specified coding system.\n\ |
