comparison src/coding.c @ 23325:bbd06336cd0c

(check_composing_code): If the current composing sequence doesn't end properly, return -1. (DECODE_CHARACTER_ASCII): Update coding->composed_chars. (DECODE_CHARACTER_DIMENSION1): Likewise. (decode_coding_iso2022): Check validity of a composing sequence. (code_convert_string): If the length of text to be converted is shrunk to zero, don't perform code conversion. (shrink_decoding_region): Fix previous change.
author Kenichi Handa <handa@m17n.org>
date Sat, 26 Sep 1998 04:20:48 +0000
parents 86a8b8566369
children 2da87b489590
comparison
equal deleted inserted replaced
23324:4c5f12c6041c 23325:bbd06336cd0c
211 point to an appropriate area and the variable `coding' to point to 211 point to an appropriate area and the variable `coding' to point to
212 the coding-system of the currently decoding text in advance. */ 212 the coding-system of the currently decoding text in advance. */
213 213
214 /* Decode one ASCII character C. */ 214 /* Decode one ASCII character C. */
215 215
216 #define DECODE_CHARACTER_ASCII(c) \ 216 #define DECODE_CHARACTER_ASCII(c) \
217 do { \ 217 do { \
218 if (COMPOSING_P (coding->composing)) \ 218 if (COMPOSING_P (coding->composing)) \
219 *dst++ = 0xA0, *dst++ = (c) | 0x80; \ 219 { \
220 else \ 220 *dst++ = 0xA0, *dst++ = (c) | 0x80; \
221 { \ 221 coding->composed_chars++; \
222 *dst++ = (c); \ 222 } \
223 coding->produced_char++; \ 223 else \
224 } \ 224 { \
225 *dst++ = (c); \
226 coding->produced_char++; \
227 } \
225 } while (0) 228 } while (0)
226 229
227 /* Decode one DIMENSION1 character whose charset is CHARSET and whose 230 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
228 position-code is C. */ 231 position-code is C. */
229 232
230 #define DECODE_CHARACTER_DIMENSION1(charset, c) \ 233 #define DECODE_CHARACTER_DIMENSION1(charset, c) \
231 do { \ 234 do { \
232 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ 235 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \
233 if (COMPOSING_P (coding->composing)) \ 236 if (COMPOSING_P (coding->composing)) \
234 *dst++ = leading_code + 0x20; \ 237 { \
238 *dst++ = leading_code + 0x20; \
239 coding->composed_chars++; \
240 } \
235 else \ 241 else \
236 { \ 242 { \
237 *dst++ = leading_code; \ 243 *dst++ = leading_code; \
238 coding->produced_char++; \ 244 coding->produced_char++; \
239 } \ 245 } \
995 } 1001 }
996 else 1002 else
997 invalid_code_found = 1; 1003 invalid_code_found = 1;
998 } 1004 }
999 } 1005 }
1000 return (invalid_code_found 1006 return (invalid_code_found ? src - src_start : -1);
1001 ? src - src_start
1002 : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
1003 } 1007 }
1004 1008
1005 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 1009 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1006 1010
1007 int 1011 int
1028 1032
1029 if (!NILP (Venable_character_translation) && NILP (translation_table)) 1033 if (!NILP (Venable_character_translation) && NILP (translation_table))
1030 translation_table = Vstandard_translation_table_for_decode; 1034 translation_table = Vstandard_translation_table_for_decode;
1031 1035
1032 coding->produced_char = 0; 1036 coding->produced_char = 0;
1037 coding->composed_chars = 0;
1033 coding->fake_multibyte = 0; 1038 coding->fake_multibyte = 0;
1034 while (src < src_end && (dst_bytes 1039 while (src < src_end && (dst_bytes
1035 ? (dst < adjusted_dst_end) 1040 ? (dst < adjusted_dst_end)
1036 : (dst < src - 6))) 1041 : (dst < src - 6)))
1037 { 1042 {
1241 if (result1 == 0) 1246 if (result1 == 0)
1242 { 1247 {
1243 coding->composing = (c1 == '0' 1248 coding->composing = (c1 == '0'
1244 ? COMPOSING_NO_RULE_HEAD 1249 ? COMPOSING_NO_RULE_HEAD
1245 : COMPOSING_WITH_RULE_HEAD); 1250 : COMPOSING_WITH_RULE_HEAD);
1246 coding->produced_char++; 1251 coding->composed_chars = 0;
1247 } 1252 }
1248 else if (result1 > 0) 1253 else if (result1 > 0)
1249 { 1254 {
1250 if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst) 1255 if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1251 { 1256 {
1252 bcopy (src_base, dst, result1 + 2); 1257 bcopy (src_base, dst, result1 + 2);
1253 src += result1; 1258 src += result1;
1254 dst += result1 + 2; 1259 dst += result1 + 2;
1255 coding->produced_char += result1 + 2; 1260 coding->produced_char += result1 + 2;
1261 coding->fake_multibyte = 1;
1256 } 1262 }
1257 else 1263 else
1258 { 1264 {
1259 result = CODING_FINISH_INSUFFICIENT_DST; 1265 result = CODING_FINISH_INSUFFICIENT_DST;
1260 goto label_end_of_loop_2; 1266 goto label_end_of_loop_2;
1264 goto label_end_of_loop; 1270 goto label_end_of_loop;
1265 } 1271 }
1266 break; 1272 break;
1267 1273
1268 case '1': /* end composing */ 1274 case '1': /* end composing */
1275 if (coding->composed_chars > 0)
1276 {
1277 if (coding->composed_chars == 1)
1278 {
1279 unsigned char *this_char_start = dst;
1280 int this_bytes;
1281
1282 /* Only one character is in the composing
1283 sequence. Make it a normal character. */
1284 while (*--this_char_start != LEADING_CODE_COMPOSITION);
1285 dst = (this_char_start
1286 + (coding->composing == COMPOSING_NO_RULE_TAIL
1287 ? 1 : 2));
1288 *dst -= 0x20;
1289 if (*dst == 0x80)
1290 *++dst &= 0x7F;
1291 this_bytes = BYTES_BY_CHAR_HEAD (*dst);
1292 while (this_bytes--) *this_char_start++ = *dst++;
1293 dst = this_char_start;
1294 }
1295 coding->produced_char++;
1296 }
1269 coding->composing = COMPOSING_NO; 1297 coding->composing = COMPOSING_NO;
1270 break; 1298 break;
1271 1299
1272 case '[': /* specification of direction */ 1300 case '[': /* specification of direction */
1273 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) 1301 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
3936 endp++; 3964 endp++;
3937 break; 3965 break;
3938 3966
3939 case CODING_CATEGORY_IDX_ISO_7: 3967 case CODING_CATEGORY_IDX_ISO_7:
3940 case CODING_CATEGORY_IDX_ISO_7_TIGHT: 3968 case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3941 /* We can skip all charactes at the tail except for ESC and 3969 {
3942 the following 2-byte at the tail. */ 3970 /* We can skip all charactes at the tail except for 8-bit
3943 if (eol_conversion) 3971 codes and ESC and the following 2-byte at the tail. */
3944 while (begp < endp 3972 unsigned char *eight_bit = NULL;
3945 && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') 3973
3946 endp--; 3974 if (eol_conversion)
3947 else 3975 while (begp < endp
3948 while (begp < endp 3976 && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
3949 && (c = endp[-1]) != ISO_CODE_ESC) 3977 {
3950 endp--; 3978 if (!eight_bit && c & 0x80) eight_bit = endp;
3951 /* Do not consider LF as ascii if preceded by CR, since that 3979 endp--;
3952 confuses eol decoding. */ 3980 }
3953 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n') 3981 else
3954 endp++; 3982 while (begp < endp
3955 if (begp < endp && endp[-1] == ISO_CODE_ESC) 3983 && (c = endp[-1]) != ISO_CODE_ESC)
3956 { 3984 {
3957 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') 3985 if (!eight_bit && c & 0x80) eight_bit = endp;
3958 /* This is an ASCII designation sequence. We can 3986 endp--;
3959 surely skip the tail. */ 3987 }
3960 endp += 2; 3988 /* Do not consider LF as ascii if preceded by CR, since that
3961 else 3989 confuses eol decoding. */
3962 /* Hmmm, we can't skip the tail. */ 3990 if (begp < endp && endp < endp_orig
3963 endp = endp_orig; 3991 && endp[-1] == '\r' && endp[0] == '\n')
3964 } 3992 endp++;
3993 if (begp < endp && endp[-1] == ISO_CODE_ESC)
3994 {
3995 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3996 /* This is an ASCII designation sequence. We can
3997 surely skip the tail. But, if we have
3998 encountered an 8-bit code, skip only the codes
3999 after that. */
4000 endp = eight_bit ? eight_bit : endp + 2;
4001 else
4002 /* Hmmm, we can't skip the tail. */
4003 endp = endp_orig;
4004 }
4005 else if (eight_bit)
4006 endp = eight_bit;
4007 }
3965 } 4008 }
3966 } 4009 }
3967 *beg += begp - begp_orig; 4010 *beg += begp - begp_orig;
3968 *end += endp - endp_orig; 4011 *end += endp - endp_orig;
3969 return; 4012 return;
4522 if (encodep) 4565 if (encodep)
4523 shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data); 4566 shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4524 else 4567 else
4525 shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); 4568 shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4526 } 4569 }
4527 if (from == to_byte 4570 if (from == to_byte)
4528 && ! (coding->mode & CODING_MODE_LAST_BLOCK
4529 && CODING_REQUIRE_FLUSHING (coding)))
4530 return (nocopy ? str : Fcopy_sequence (str)); 4571 return (nocopy ? str : Fcopy_sequence (str));
4531 4572
4532 if (encodep) 4573 if (encodep)
4533 len = encoding_buffer_size (coding, to_byte - from); 4574 len = encoding_buffer_size (coding, to_byte - from);
4534 else 4575 else