Mercurial > emacs
comparison src/coding.c @ 23325:bbd06336cd0c
(check_composing_code): If the current composing
sequence doesn't end properly, return -1.
(DECODE_CHARACTER_ASCII): Update coding->composed_chars.
(DECODE_CHARACTER_DIMENSION1): Likewise.
(decode_coding_iso2022): Check validity of a composing sequence.
(code_convert_string): If the length of text to be converted is
shrunk to zero, don't perform code conversion.
(shrink_decoding_region): Fix previous change.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Sat, 26 Sep 1998 04:20:48 +0000 |
| parents | 86a8b8566369 |
| children | 2da87b489590 |
comparison
equal
deleted
inserted
replaced
| 23324:4c5f12c6041c | 23325:bbd06336cd0c |
|---|---|
| 211 point to an appropriate area and the variable `coding' to point to | 211 point to an appropriate area and the variable `coding' to point to |
| 212 the coding-system of the currently decoding text in advance. */ | 212 the coding-system of the currently decoding text in advance. */ |
| 213 | 213 |
| 214 /* Decode one ASCII character C. */ | 214 /* Decode one ASCII character C. */ |
| 215 | 215 |
| 216 #define DECODE_CHARACTER_ASCII(c) \ | 216 #define DECODE_CHARACTER_ASCII(c) \ |
| 217 do { \ | 217 do { \ |
| 218 if (COMPOSING_P (coding->composing)) \ | 218 if (COMPOSING_P (coding->composing)) \ |
| 219 *dst++ = 0xA0, *dst++ = (c) | 0x80; \ | 219 { \ |
| 220 else \ | 220 *dst++ = 0xA0, *dst++ = (c) | 0x80; \ |
| 221 { \ | 221 coding->composed_chars++; \ |
| 222 *dst++ = (c); \ | 222 } \ |
| 223 coding->produced_char++; \ | 223 else \ |
| 224 } \ | 224 { \ |
| 225 *dst++ = (c); \ | |
| 226 coding->produced_char++; \ | |
| 227 } \ | |
| 225 } while (0) | 228 } while (0) |
| 226 | 229 |
| 227 /* Decode one DIMENSION1 character whose charset is CHARSET and whose | 230 /* Decode one DIMENSION1 character whose charset is CHARSET and whose |
| 228 position-code is C. */ | 231 position-code is C. */ |
| 229 | 232 |
| 230 #define DECODE_CHARACTER_DIMENSION1(charset, c) \ | 233 #define DECODE_CHARACTER_DIMENSION1(charset, c) \ |
| 231 do { \ | 234 do { \ |
| 232 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ | 235 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ |
| 233 if (COMPOSING_P (coding->composing)) \ | 236 if (COMPOSING_P (coding->composing)) \ |
| 234 *dst++ = leading_code + 0x20; \ | 237 { \ |
| 238 *dst++ = leading_code + 0x20; \ | |
| 239 coding->composed_chars++; \ | |
| 240 } \ | |
| 235 else \ | 241 else \ |
| 236 { \ | 242 { \ |
| 237 *dst++ = leading_code; \ | 243 *dst++ = leading_code; \ |
| 238 coding->produced_char++; \ | 244 coding->produced_char++; \ |
| 239 } \ | 245 } \ |
| 995 } | 1001 } |
| 996 else | 1002 else |
| 997 invalid_code_found = 1; | 1003 invalid_code_found = 1; |
| 998 } | 1004 } |
| 999 } | 1005 } |
| 1000 return (invalid_code_found | 1006 return (invalid_code_found ? src - src_start : -1); |
| 1001 ? src - src_start | |
| 1002 : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1)); | |
| 1003 } | 1007 } |
| 1004 | 1008 |
| 1005 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 1009 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 1006 | 1010 |
| 1007 int | 1011 int |
| 1028 | 1032 |
| 1029 if (!NILP (Venable_character_translation) && NILP (translation_table)) | 1033 if (!NILP (Venable_character_translation) && NILP (translation_table)) |
| 1030 translation_table = Vstandard_translation_table_for_decode; | 1034 translation_table = Vstandard_translation_table_for_decode; |
| 1031 | 1035 |
| 1032 coding->produced_char = 0; | 1036 coding->produced_char = 0; |
| 1037 coding->composed_chars = 0; | |
| 1033 coding->fake_multibyte = 0; | 1038 coding->fake_multibyte = 0; |
| 1034 while (src < src_end && (dst_bytes | 1039 while (src < src_end && (dst_bytes |
| 1035 ? (dst < adjusted_dst_end) | 1040 ? (dst < adjusted_dst_end) |
| 1036 : (dst < src - 6))) | 1041 : (dst < src - 6))) |
| 1037 { | 1042 { |
| 1241 if (result1 == 0) | 1246 if (result1 == 0) |
| 1242 { | 1247 { |
| 1243 coding->composing = (c1 == '0' | 1248 coding->composing = (c1 == '0' |
| 1244 ? COMPOSING_NO_RULE_HEAD | 1249 ? COMPOSING_NO_RULE_HEAD |
| 1245 : COMPOSING_WITH_RULE_HEAD); | 1250 : COMPOSING_WITH_RULE_HEAD); |
| 1246 coding->produced_char++; | 1251 coding->composed_chars = 0; |
| 1247 } | 1252 } |
| 1248 else if (result1 > 0) | 1253 else if (result1 > 0) |
| 1249 { | 1254 { |
| 1250 if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst) | 1255 if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst) |
| 1251 { | 1256 { |
| 1252 bcopy (src_base, dst, result1 + 2); | 1257 bcopy (src_base, dst, result1 + 2); |
| 1253 src += result1; | 1258 src += result1; |
| 1254 dst += result1 + 2; | 1259 dst += result1 + 2; |
| 1255 coding->produced_char += result1 + 2; | 1260 coding->produced_char += result1 + 2; |
| 1261 coding->fake_multibyte = 1; | |
| 1256 } | 1262 } |
| 1257 else | 1263 else |
| 1258 { | 1264 { |
| 1259 result = CODING_FINISH_INSUFFICIENT_DST; | 1265 result = CODING_FINISH_INSUFFICIENT_DST; |
| 1260 goto label_end_of_loop_2; | 1266 goto label_end_of_loop_2; |
| 1264 goto label_end_of_loop; | 1270 goto label_end_of_loop; |
| 1265 } | 1271 } |
| 1266 break; | 1272 break; |
| 1267 | 1273 |
| 1268 case '1': /* end composing */ | 1274 case '1': /* end composing */ |
| 1275 if (coding->composed_chars > 0) | |
| 1276 { | |
| 1277 if (coding->composed_chars == 1) | |
| 1278 { | |
| 1279 unsigned char *this_char_start = dst; | |
| 1280 int this_bytes; | |
| 1281 | |
| 1282 /* Only one character is in the composing | |
| 1283 sequence. Make it a normal character. */ | |
| 1284 while (*--this_char_start != LEADING_CODE_COMPOSITION); | |
| 1285 dst = (this_char_start | |
| 1286 + (coding->composing == COMPOSING_NO_RULE_TAIL | |
| 1287 ? 1 : 2)); | |
| 1288 *dst -= 0x20; | |
| 1289 if (*dst == 0x80) | |
| 1290 *++dst &= 0x7F; | |
| 1291 this_bytes = BYTES_BY_CHAR_HEAD (*dst); | |
| 1292 while (this_bytes--) *this_char_start++ = *dst++; | |
| 1293 dst = this_char_start; | |
| 1294 } | |
| 1295 coding->produced_char++; | |
| 1296 } | |
| 1269 coding->composing = COMPOSING_NO; | 1297 coding->composing = COMPOSING_NO; |
| 1270 break; | 1298 break; |
| 1271 | 1299 |
| 1272 case '[': /* specification of direction */ | 1300 case '[': /* specification of direction */ |
| 1273 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) | 1301 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) |
| 3936 endp++; | 3964 endp++; |
| 3937 break; | 3965 break; |
| 3938 | 3966 |
| 3939 case CODING_CATEGORY_IDX_ISO_7: | 3967 case CODING_CATEGORY_IDX_ISO_7: |
| 3940 case CODING_CATEGORY_IDX_ISO_7_TIGHT: | 3968 case CODING_CATEGORY_IDX_ISO_7_TIGHT: |
| 3941 /* We can skip all charactes at the tail except for ESC and | 3969 { |
| 3942 the following 2-byte at the tail. */ | 3970 /* We can skip all charactes at the tail except for 8-bit |
| 3943 if (eol_conversion) | 3971 codes and ESC and the following 2-byte at the tail. */ |
| 3944 while (begp < endp | 3972 unsigned char *eight_bit = NULL; |
| 3945 && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') | 3973 |
| 3946 endp--; | 3974 if (eol_conversion) |
| 3947 else | 3975 while (begp < endp |
| 3948 while (begp < endp | 3976 && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') |
| 3949 && (c = endp[-1]) != ISO_CODE_ESC) | 3977 { |
| 3950 endp--; | 3978 if (!eight_bit && c & 0x80) eight_bit = endp; |
| 3951 /* Do not consider LF as ascii if preceded by CR, since that | 3979 endp--; |
| 3952 confuses eol decoding. */ | 3980 } |
| 3953 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n') | 3981 else |
| 3954 endp++; | 3982 while (begp < endp |
| 3955 if (begp < endp && endp[-1] == ISO_CODE_ESC) | 3983 && (c = endp[-1]) != ISO_CODE_ESC) |
| 3956 { | 3984 { |
| 3957 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') | 3985 if (!eight_bit && c & 0x80) eight_bit = endp; |
| 3958 /* This is an ASCII designation sequence. We can | 3986 endp--; |
| 3959 surely skip the tail. */ | 3987 } |
| 3960 endp += 2; | 3988 /* Do not consider LF as ascii if preceded by CR, since that |
| 3961 else | 3989 confuses eol decoding. */ |
| 3962 /* Hmmm, we can't skip the tail. */ | 3990 if (begp < endp && endp < endp_orig |
| 3963 endp = endp_orig; | 3991 && endp[-1] == '\r' && endp[0] == '\n') |
| 3964 } | 3992 endp++; |
| 3993 if (begp < endp && endp[-1] == ISO_CODE_ESC) | |
| 3994 { | |
| 3995 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') | |
| 3996 /* This is an ASCII designation sequence. We can | |
| 3997 surely skip the tail. But, if we have | |
| 3998 encountered an 8-bit code, skip only the codes | |
| 3999 after that. */ | |
| 4000 endp = eight_bit ? eight_bit : endp + 2; | |
| 4001 else | |
| 4002 /* Hmmm, we can't skip the tail. */ | |
| 4003 endp = endp_orig; | |
| 4004 } | |
| 4005 else if (eight_bit) | |
| 4006 endp = eight_bit; | |
| 4007 } | |
| 3965 } | 4008 } |
| 3966 } | 4009 } |
| 3967 *beg += begp - begp_orig; | 4010 *beg += begp - begp_orig; |
| 3968 *end += endp - endp_orig; | 4011 *end += endp - endp_orig; |
| 3969 return; | 4012 return; |
| 4522 if (encodep) | 4565 if (encodep) |
| 4523 shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data); | 4566 shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data); |
| 4524 else | 4567 else |
| 4525 shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); | 4568 shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); |
| 4526 } | 4569 } |
| 4527 if (from == to_byte | 4570 if (from == to_byte) |
| 4528 && ! (coding->mode & CODING_MODE_LAST_BLOCK | |
| 4529 && CODING_REQUIRE_FLUSHING (coding))) | |
| 4530 return (nocopy ? str : Fcopy_sequence (str)); | 4571 return (nocopy ? str : Fcopy_sequence (str)); |
| 4531 | 4572 |
| 4532 if (encodep) | 4573 if (encodep) |
| 4533 len = encoding_buffer_size (coding, to_byte - from); | 4574 len = encoding_buffer_size (coding, to_byte - from); |
| 4534 else | 4575 else |
