comparison src/coding.c @ 89686:9bfefb13fe83

(Qinsufficient_source, Qinconsistent_eol) (Qinvalid_source, Qinterrupted, Qinsufficient_memory): New variables. (Vlast_code_conversion_error): New variables. (syms_of_coding): DEFSYM or DEFVAR_LISP them. (ONE_MORE_BYTE): Record error if any instead of signaling an error. If non-ASCII multibyte char is found, return the negative value of the code. All callers changed to check it. (ONE_MORE_BYTE_NO_CHECK): Likewise. (record_conversion_result): New function. All codes setting coding->result are changed to call this function. (detect_coding_utf_8): Don't use the local variable incomplete. (decode_coding_utf_8): Likewise. (emacs_mule_char): Change the second arg to `const'. (detect_coding_emacs_mule): Don't use the local variable incomplete. (detect_coding_sjis): Likewise. (detect_coding_big5): Likewise. (decode_coding): Fix of flushing out unprocessed data. (make_conversion_work_buffer): Fix making of a work buffer. (decode_coding_object): Return coding->dst_object;
author Kenichi Handa <handa@m17n.org>
date Mon, 29 Dec 2003 07:52:49 +0000
parents cf1ff36f92dc
children d8fcefca5cf6
comparison
equal deleted inserted replaced
89685:8970a5ea5efc 89686:9bfefb13fe83
316 extern Lisp_Object Qinsert_file_contents, Qwrite_region; 316 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
317 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; 317 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
318 Lisp_Object Qstart_process, Qopen_network_stream; 318 Lisp_Object Qstart_process, Qopen_network_stream;
319 Lisp_Object Qtarget_idx; 319 Lisp_Object Qtarget_idx;
320 320
321 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
322 Lisp_Object Qinterrupted, Qinsufficient_memory;
323
321 int coding_system_require_warning; 324 int coding_system_require_warning;
322 325
323 Lisp_Object Vselect_safe_coding_system_function; 326 Lisp_Object Vselect_safe_coding_system_function;
324 327
325 /* Mnemonic string for each format of end-of-line. */ 328 /* Mnemonic string for each format of end-of-line. */
345 Lisp_Object Vcoding_system_for_read; 348 Lisp_Object Vcoding_system_for_read;
346 /* Coding-system for writing files and sending data to process. */ 349 /* Coding-system for writing files and sending data to process. */
347 Lisp_Object Vcoding_system_for_write; 350 Lisp_Object Vcoding_system_for_write;
348 /* Coding-system actually used in the latest I/O. */ 351 /* Coding-system actually used in the latest I/O. */
349 Lisp_Object Vlast_coding_system_used; 352 Lisp_Object Vlast_coding_system_used;
350 353 /* Set to non-nil when an error is detected while code conversion. */
354 Lisp_Object Vlast_code_conversion_error;
351 /* A vector of length 256 which contains information about special 355 /* A vector of length 256 which contains information about special
352 Latin codes (especially for dealing with Microsoft codes). */ 356 Latin codes (especially for dealing with Microsoft codes). */
353 Lisp_Object Vlatin_extra_code_table; 357 Lisp_Object Vlatin_extra_code_table;
354 358
355 /* Flag to inhibit code conversion of end-of-line format. */ 359 /* Flag to inhibit code conversion of end-of-line format. */
404 /* Two special coding systems. */ 408 /* Two special coding systems. */
405 Lisp_Object Vsjis_coding_system; 409 Lisp_Object Vsjis_coding_system;
406 Lisp_Object Vbig5_coding_system; 410 Lisp_Object Vbig5_coding_system;
407 411
408 412
413 static void record_conversion_result (struct coding_system *coding,
414 enum coding_result_code result);
409 static int detect_coding_utf_8 P_ ((struct coding_system *, 415 static int detect_coding_utf_8 P_ ((struct coding_system *,
410 struct coding_detection_info *info)); 416 struct coding_detection_info *info));
411 static void decode_coding_utf_8 P_ ((struct coding_system *)); 417 static void decode_coding_utf_8 P_ ((struct coding_system *));
412 static int encode_coding_utf_8 P_ ((struct coding_system *)); 418 static int encode_coding_utf_8 P_ ((struct coding_system *));
413 419
716 } while (0) 722 } while (0)
717 723
718 724
719 /* Safely get one byte from the source text pointed by SRC which ends 725 /* Safely get one byte from the source text pointed by SRC which ends
720 at SRC_END, and set C to that byte. If there are not enough bytes 726 at SRC_END, and set C to that byte. If there are not enough bytes
721 in the source, it jumps to `no_more_source'. The caller 727 in the source, it jumps to `no_more_source'. If multibytep is
722 should declare and set these variables appropriately in advance: 728 nonzero, and a multibyte character is found at SRC, set C to the
723 src, src_end, multibytep 729 negative value of the character code. The caller should declare
724 */ 730 and set these variables appropriately in advance:
725 731 src, src_end, multibytep */
726 #define ONE_MORE_BYTE(c) \ 732
727 do { \ 733 #define ONE_MORE_BYTE(c) \
728 if (src == src_end) \ 734 do { \
729 { \ 735 if (src == src_end) \
730 if (src_base < src) \ 736 { \
731 coding->result = CODING_RESULT_INSUFFICIENT_SRC; \ 737 if (src_base < src) \
732 goto no_more_source; \ 738 record_conversion_result \
733 } \ 739 (coding, CODING_RESULT_INSUFFICIENT_SRC); \
734 c = *src++; \ 740 goto no_more_source; \
735 if (multibytep && (c & 0x80)) \ 741 } \
736 { \ 742 c = *src++; \
737 if ((c & 0xFE) != 0xC0) \ 743 if (multibytep && (c & 0x80)) \
738 error ("Undecodable char found"); \ 744 { \
739 c = ((c & 1) << 6) | *src++; \ 745 if ((c & 0xFE) == 0xC0) \
740 } \ 746 c = ((c & 1) << 6) | *src++; \
741 consumed_chars++; \ 747 else \
748 { \
749 c = - string_char (--src, &src, NULL); \
750 record_conversion_result \
751 (coding, CODING_RESULT_INVALID_SRC); \
752 } \
753 } \
754 consumed_chars++; \
742 } while (0) 755 } while (0)
743 756
744 757
745 #define ONE_MORE_BYTE_NO_CHECK(c) \ 758 #define ONE_MORE_BYTE_NO_CHECK(c) \
746 do { \ 759 do { \
747 c = *src++; \ 760 c = *src++; \
748 if (multibytep && (c & 0x80)) \ 761 if (multibytep && (c & 0x80)) \
749 { \ 762 { \
750 if ((c & 0xFE) != 0xC0) \ 763 if ((c & 0xFE) == 0xC0) \
751 error ("Undecodable char found"); \ 764 c = ((c & 1) << 6) | *src++; \
752 c = ((c & 1) << 6) | *src++; \ 765 else \
753 } \ 766 { \
754 consumed_chars++; \ 767 c = - string_char (--src, &src, NULL); \
768 record_conversion_result \
769 (coding, CODING_RESULT_INVALID_SRC); \
770 } \
771 } \
772 consumed_chars++; \
755 } while (0) 773 } while (0)
756 774
757 775
758 /* Store a byte C in the place pointed by DST and increment DST to the 776 /* Store a byte C in the place pointed by DST and increment DST to the
759 next free point, and increment PRODUCED_CHARS. The caller should 777 next free point, and increment PRODUCED_CHARS. The caller should
837 EMIT_TWO_BYTES (c1, c2); \ 855 EMIT_TWO_BYTES (c1, c2); \
838 EMIT_TWO_BYTES (c3, c4); \ 856 EMIT_TWO_BYTES (c3, c4); \
839 } while (0) 857 } while (0)
840 858
841 859
860 static void
861 record_conversion_result (struct coding_system *coding,
862 enum coding_result_code result)
863 {
864 coding->result = result;
865 switch (result)
866 {
867 case CODING_RESULT_INSUFFICIENT_SRC:
868 Vlast_code_conversion_error = Qinsufficient_source;
869 break;
870 case CODING_RESULT_INCONSISTENT_EOL:
871 Vlast_code_conversion_error = Qinconsistent_eol;
872 break;
873 case CODING_RESULT_INVALID_SRC:
874 Vlast_code_conversion_error = Qinvalid_source;
875 break;
876 case CODING_RESULT_INTERRUPT:
877 Vlast_code_conversion_error = Qinterrupted;
878 break;
879 case CODING_RESULT_INSUFFICIENT_MEM:
880 Vlast_code_conversion_error = Qinsufficient_memory;
881 break;
882 }
883 }
884
842 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ 885 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
843 do { \ 886 do { \
844 charset_map_loaded = 0; \ 887 charset_map_loaded = 0; \
845 c = DECODE_CHAR (charset, code); \ 888 c = DECODE_CHAR (charset, code); \
846 if (charset_map_loaded) \ 889 if (charset_map_loaded) \
969 1012
970 if (BUFFERP (coding->dst_object)) 1013 if (BUFFERP (coding->dst_object))
971 coding_alloc_by_making_gap (coding, nbytes); 1014 coding_alloc_by_making_gap (coding, nbytes);
972 else 1015 else
973 coding_alloc_by_realloc (coding, nbytes); 1016 coding_alloc_by_realloc (coding, nbytes);
974 coding->result = CODING_RESULT_SUCCESS; 1017 record_conversion_result (coding, CODING_RESULT_SUCCESS);
975 coding_set_destination (coding); 1018 coding_set_destination (coding);
976 dst = coding->destination + offset; 1019 dst = coding->destination + offset;
977 return dst; 1020 return dst;
978 } 1021 }
979 1022
1047 static int 1090 static int
1048 detect_coding_utf_8 (coding, detect_info) 1091 detect_coding_utf_8 (coding, detect_info)
1049 struct coding_system *coding; 1092 struct coding_system *coding;
1050 struct coding_detection_info *detect_info; 1093 struct coding_detection_info *detect_info;
1051 { 1094 {
1052 const unsigned char *src = coding->source, *src_base = src; 1095 const unsigned char *src = coding->source, *src_base;
1053 const unsigned char *src_end = coding->source + coding->src_bytes; 1096 const unsigned char *src_end = coding->source + coding->src_bytes;
1054 int multibytep = coding->src_multibyte; 1097 int multibytep = coding->src_multibyte;
1055 int consumed_chars = 0; 1098 int consumed_chars = 0;
1056 int found = 0; 1099 int found = 0;
1057 int incomplete;
1058 1100
1059 detect_info->checked |= CATEGORY_MASK_UTF_8; 1101 detect_info->checked |= CATEGORY_MASK_UTF_8;
1060 /* A coding system of this category is always ASCII compatible. */ 1102 /* A coding system of this category is always ASCII compatible. */
1061 src += coding->head_ascii; 1103 src += coding->head_ascii;
1062 1104
1063 while (1) 1105 while (1)
1064 { 1106 {
1065 int c, c1, c2, c3, c4; 1107 int c, c1, c2, c3, c4;
1066 1108
1067 incomplete = 0; 1109 src_base = src;
1068 ONE_MORE_BYTE (c); 1110 ONE_MORE_BYTE (c);
1069 if (UTF_8_1_OCTET_P (c)) 1111 if (c < 0 || UTF_8_1_OCTET_P (c))
1070 continue; 1112 continue;
1071 incomplete = 1;
1072 ONE_MORE_BYTE (c1); 1113 ONE_MORE_BYTE (c1);
1073 if (! UTF_8_EXTRA_OCTET_P (c1)) 1114 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1074 break; 1115 break;
1075 if (UTF_8_2_OCTET_LEADING_P (c)) 1116 if (UTF_8_2_OCTET_LEADING_P (c))
1076 { 1117 {
1077 found = CATEGORY_MASK_UTF_8; 1118 found = CATEGORY_MASK_UTF_8;
1078 continue; 1119 continue;
1079 } 1120 }
1080 ONE_MORE_BYTE (c2); 1121 ONE_MORE_BYTE (c2);
1081 if (! UTF_8_EXTRA_OCTET_P (c2)) 1122 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1082 break; 1123 break;
1083 if (UTF_8_3_OCTET_LEADING_P (c)) 1124 if (UTF_8_3_OCTET_LEADING_P (c))
1084 { 1125 {
1085 found = CATEGORY_MASK_UTF_8; 1126 found = CATEGORY_MASK_UTF_8;
1086 continue; 1127 continue;
1087 } 1128 }
1088 ONE_MORE_BYTE (c3); 1129 ONE_MORE_BYTE (c3);
1089 if (! UTF_8_EXTRA_OCTET_P (c3)) 1130 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1090 break; 1131 break;
1091 if (UTF_8_4_OCTET_LEADING_P (c)) 1132 if (UTF_8_4_OCTET_LEADING_P (c))
1092 { 1133 {
1093 found = CATEGORY_MASK_UTF_8; 1134 found = CATEGORY_MASK_UTF_8;
1094 continue; 1135 continue;
1095 } 1136 }
1096 ONE_MORE_BYTE (c4); 1137 ONE_MORE_BYTE (c4);
1097 if (! UTF_8_EXTRA_OCTET_P (c4)) 1138 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1098 break; 1139 break;
1099 if (UTF_8_5_OCTET_LEADING_P (c)) 1140 if (UTF_8_5_OCTET_LEADING_P (c))
1100 { 1141 {
1101 found = CATEGORY_MASK_UTF_8; 1142 found = CATEGORY_MASK_UTF_8;
1102 continue; 1143 continue;
1105 } 1146 }
1106 detect_info->rejected |= CATEGORY_MASK_UTF_8; 1147 detect_info->rejected |= CATEGORY_MASK_UTF_8;
1107 return 0; 1148 return 0;
1108 1149
1109 no_more_source: 1150 no_more_source:
1110 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) 1151 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1111 { 1152 {
1112 detect_info->rejected |= CATEGORY_MASK_UTF_8; 1153 detect_info->rejected |= CATEGORY_MASK_UTF_8;
1113 return 0; 1154 return 0;
1114 } 1155 }
1115 detect_info->found |= found; 1156 detect_info->found |= found;
1141 1182
1142 if (charbuf >= charbuf_end) 1183 if (charbuf >= charbuf_end)
1143 break; 1184 break;
1144 1185
1145 ONE_MORE_BYTE (c1); 1186 ONE_MORE_BYTE (c1);
1146 if (UTF_8_1_OCTET_P(c1)) 1187 if (c1 < 0)
1188 {
1189 c = - c1;
1190 }
1191 else if (UTF_8_1_OCTET_P(c1))
1147 { 1192 {
1148 c = c1; 1193 c = c1;
1149 } 1194 }
1150 else 1195 else
1151 { 1196 {
1152 ONE_MORE_BYTE (c2); 1197 ONE_MORE_BYTE (c2);
1153 if (! UTF_8_EXTRA_OCTET_P (c2)) 1198 if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1154 goto invalid_code; 1199 goto invalid_code;
1155 if (UTF_8_2_OCTET_LEADING_P (c1)) 1200 if (UTF_8_2_OCTET_LEADING_P (c1))
1156 { 1201 {
1157 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); 1202 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1158 /* Reject overlong sequences here and below. Encoders 1203 /* Reject overlong sequences here and below. Encoders
1162 goto invalid_code; 1207 goto invalid_code;
1163 } 1208 }
1164 else 1209 else
1165 { 1210 {
1166 ONE_MORE_BYTE (c3); 1211 ONE_MORE_BYTE (c3);
1167 if (! UTF_8_EXTRA_OCTET_P (c3)) 1212 if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1168 goto invalid_code; 1213 goto invalid_code;
1169 if (UTF_8_3_OCTET_LEADING_P (c1)) 1214 if (UTF_8_3_OCTET_LEADING_P (c1))
1170 { 1215 {
1171 c = (((c1 & 0xF) << 12) 1216 c = (((c1 & 0xF) << 12)
1172 | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); 1217 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1175 goto invalid_code; 1220 goto invalid_code;
1176 } 1221 }
1177 else 1222 else
1178 { 1223 {
1179 ONE_MORE_BYTE (c4); 1224 ONE_MORE_BYTE (c4);
1180 if (! UTF_8_EXTRA_OCTET_P (c4)) 1225 if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1181 goto invalid_code; 1226 goto invalid_code;
1182 if (UTF_8_4_OCTET_LEADING_P (c1)) 1227 if (UTF_8_4_OCTET_LEADING_P (c1))
1183 { 1228 {
1184 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) 1229 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1185 | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); 1230 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1187 goto invalid_code; 1232 goto invalid_code;
1188 } 1233 }
1189 else 1234 else
1190 { 1235 {
1191 ONE_MORE_BYTE (c5); 1236 ONE_MORE_BYTE (c5);
1192 if (! UTF_8_EXTRA_OCTET_P (c5)) 1237 if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1193 goto invalid_code; 1238 goto invalid_code;
1194 if (UTF_8_5_OCTET_LEADING_P (c1)) 1239 if (UTF_8_5_OCTET_LEADING_P (c1))
1195 { 1240 {
1196 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) 1241 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1197 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) 1242 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1269 c = *charbuf++; 1314 c = *charbuf++;
1270 dst += CHAR_STRING (c, dst); 1315 dst += CHAR_STRING (c, dst);
1271 produced_chars++; 1316 produced_chars++;
1272 } 1317 }
1273 } 1318 }
1274 coding->result = CODING_RESULT_SUCCESS; 1319 record_conversion_result (coding, CODING_RESULT_SUCCESS);
1275 coding->produced_char += produced_chars; 1320 coding->produced_char += produced_chars;
1276 coding->produced = dst - coding->destination; 1321 coding->produced = dst - coding->destination;
1277 return 0; 1322 return 0;
1278 } 1323 }
1279 1324
1329 | CATEGORY_MASK_UTF_16_AUTO); 1374 | CATEGORY_MASK_UTF_16_AUTO);
1330 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE 1375 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1331 | CATEGORY_MASK_UTF_16_BE_NOSIG 1376 | CATEGORY_MASK_UTF_16_BE_NOSIG
1332 | CATEGORY_MASK_UTF_16_LE_NOSIG); 1377 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1333 } 1378 }
1334 else 1379 else if (c1 >= 0 && c2 >= 0)
1335 { 1380 {
1336 unsigned char b1[256], b2[256]; 1381 unsigned char b1[256], b2[256];
1337 int b1_variants = 1, b2_variants = 1; 1382 int b1_variants = 1, b2_variants = 1;
1338 int n; 1383 int n;
1339 1384
1340 bzero (b1, 256), bzero (b2, 256); 1385 bzero (b1, 256), bzero (b2, 256);
1341 b1[c1]++, b2[c2]++; 1386 b1[c1]++, b2[c2]++;
1342 for (n = 0; n < 256 && src < src_end; n++) 1387 for (n = 0; n < 256 && src < src_end; n++)
1343 { 1388 {
1389 src_base = src;
1344 ONE_MORE_BYTE (c1); 1390 ONE_MORE_BYTE (c1);
1345 ONE_MORE_BYTE (c2); 1391 ONE_MORE_BYTE (c2);
1392 if (c1 < 0 || c2 < 0)
1393 break;
1346 if (! b1[c1++]) b1_variants++; 1394 if (! b1[c1++]) b1_variants++;
1347 if (! b2[c2++]) b2_variants++; 1395 if (! b2[c2++]) b2_variants++;
1348 } 1396 }
1349 if (b1_variants < b2_variants) 1397 if (b1_variants < b2_variants)
1350 detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG; 1398 detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1410 1458
1411 if (charbuf + 2 >= charbuf_end) 1459 if (charbuf + 2 >= charbuf_end)
1412 break; 1460 break;
1413 1461
1414 ONE_MORE_BYTE (c1); 1462 ONE_MORE_BYTE (c1);
1463 if (c1 < 0)
1464 {
1465 *charbuf++ = -c1;
1466 continue;
1467 }
1415 ONE_MORE_BYTE (c2); 1468 ONE_MORE_BYTE (c2);
1469 if (c2 < 0)
1470 {
1471 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1472 *charbuf++ = -c2;
1473 continue;
1474 }
1416 c = (endian == utf_16_big_endian 1475 c = (endian == utf_16_big_endian
1417 ? ((c1 << 8) | c2) : ((c2 << 8) | c1)); 1476 ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1418 if (surrogate) 1477 if (surrogate)
1419 { 1478 {
1420 if (! UTF_16_LOW_SURROGATE_P (c)) 1479 if (! UTF_16_LOW_SURROGATE_P (c))
1506 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF); 1565 EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1507 else 1566 else
1508 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8); 1567 EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1509 } 1568 }
1510 } 1569 }
1511 coding->result = CODING_RESULT_SUCCESS; 1570 record_conversion_result (coding, CODING_RESULT_SUCCESS);
1512 coding->produced = dst - coding->destination; 1571 coding->produced = dst - coding->destination;
1513 coding->produced_char += produced_chars; 1572 coding->produced_char += produced_chars;
1514 return 0; 1573 return 0;
1515 } 1574 }
1516 1575
1591 char emacs_mule_bytes[256]; 1650 char emacs_mule_bytes[256];
1592 1651
1593 int 1652 int
1594 emacs_mule_char (coding, src, nbytes, nchars, id) 1653 emacs_mule_char (coding, src, nbytes, nchars, id)
1595 struct coding_system *coding; 1654 struct coding_system *coding;
1596 unsigned char *src; 1655 const unsigned char *src;
1597 int *nbytes, *nchars, *id; 1656 int *nbytes, *nchars, *id;
1598 { 1657 {
1599 const unsigned char *src_end = coding->source + coding->src_bytes; 1658 const unsigned char *src_end = coding->source + coding->src_bytes;
1600 const unsigned char *src_base = src; 1659 const unsigned char *src_base = src;
1601 int multibytep = coding->src_multibyte; 1660 int multibytep = coding->src_multibyte;
1603 unsigned code; 1662 unsigned code;
1604 int c; 1663 int c;
1605 int consumed_chars = 0; 1664 int consumed_chars = 0;
1606 1665
1607 ONE_MORE_BYTE (c); 1666 ONE_MORE_BYTE (c);
1608 switch (emacs_mule_bytes[c]) 1667 if (c < 0)
1609 { 1668 {
1610 case 2: 1669 c = -c;
1611 if (! (charset = emacs_mule_charset[c])) 1670 charset = emacs_mule_charset[0];
1612 goto invalid_code; 1671 }
1613 ONE_MORE_BYTE (c); 1672 else
1614 code = c & 0x7F; 1673 {
1615 break; 1674 switch (emacs_mule_bytes[c])
1616 1675 {
1617 case 3: 1676 case 2:
1618 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1619 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1620 {
1621 ONE_MORE_BYTE (c);
1622 if (! (charset = emacs_mule_charset[c])) 1677 if (! (charset = emacs_mule_charset[c]))
1623 goto invalid_code; 1678 goto invalid_code;
1624 ONE_MORE_BYTE (c); 1679 ONE_MORE_BYTE (c);
1680 if (c < 0)
1681 goto invalid_code;
1625 code = c & 0x7F; 1682 code = c & 0x7F;
1626 } 1683 break;
1627 else 1684
1628 { 1685 case 3:
1629 if (! (charset = emacs_mule_charset[c])) 1686 if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1687 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1688 {
1689 ONE_MORE_BYTE (c);
1690 if (c < 0 || ! (charset = emacs_mule_charset[c]))
1691 goto invalid_code;
1692 ONE_MORE_BYTE (c);
1693 if (c < 0)
1694 goto invalid_code;
1695 code = c & 0x7F;
1696 }
1697 else
1698 {
1699 if (! (charset = emacs_mule_charset[c]))
1700 goto invalid_code;
1701 ONE_MORE_BYTE (c);
1702 if (c < 0)
1703 goto invalid_code;
1704 code = (c & 0x7F) << 8;
1705 ONE_MORE_BYTE (c);
1706 if (c < 0)
1707 goto invalid_code;
1708 code |= c & 0x7F;
1709 }
1710 break;
1711
1712 case 4:
1713 ONE_MORE_BYTE (c);
1714 if (c < 0 || ! (charset = emacs_mule_charset[c]))
1630 goto invalid_code; 1715 goto invalid_code;
1631 ONE_MORE_BYTE (c); 1716 ONE_MORE_BYTE (c);
1717 if (c < 0)
1718 goto invalid_code;
1632 code = (c & 0x7F) << 8; 1719 code = (c & 0x7F) << 8;
1633 ONE_MORE_BYTE (c); 1720 ONE_MORE_BYTE (c);
1721 if (c < 0)
1722 goto invalid_code;
1634 code |= c & 0x7F; 1723 code |= c & 0x7F;
1635 } 1724 break;
1636 break; 1725
1637 1726 case 1:
1638 case 4: 1727 code = c;
1639 ONE_MORE_BYTE (c); 1728 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1640 if (! (charset = emacs_mule_charset[c])) 1729 ? charset_ascii : charset_eight_bit);
1730 break;
1731
1732 default:
1733 abort ();
1734 }
1735 c = DECODE_CHAR (charset, code);
1736 if (c < 0)
1641 goto invalid_code; 1737 goto invalid_code;
1642 ONE_MORE_BYTE (c); 1738 }
1643 code = (c & 0x7F) << 8;
1644 ONE_MORE_BYTE (c);
1645 code |= c & 0x7F;
1646 break;
1647
1648 case 1:
1649 code = c;
1650 charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1651 ? charset_ascii : charset_eight_bit);
1652 break;
1653
1654 default:
1655 abort ();
1656 }
1657 c = DECODE_CHAR (charset, code);
1658 if (c < 0)
1659 goto invalid_code;
1660 *nbytes = src - src_base; 1739 *nbytes = src - src_base;
1661 *nchars = consumed_chars; 1740 *nchars = consumed_chars;
1662 if (id) 1741 if (id)
1663 *id = charset->id; 1742 *id = charset->id;
1664 return c; 1743 return c;
1678 static int 1757 static int
1679 detect_coding_emacs_mule (coding, detect_info) 1758 detect_coding_emacs_mule (coding, detect_info)
1680 struct coding_system *coding; 1759 struct coding_system *coding;
1681 struct coding_detection_info *detect_info; 1760 struct coding_detection_info *detect_info;
1682 { 1761 {
1683 const unsigned char *src = coding->source, *src_base = src; 1762 const unsigned char *src = coding->source, *src_base;
1684 const unsigned char *src_end = coding->source + coding->src_bytes; 1763 const unsigned char *src_end = coding->source + coding->src_bytes;
1685 int multibytep = coding->src_multibyte; 1764 int multibytep = coding->src_multibyte;
1686 int consumed_chars = 0; 1765 int consumed_chars = 0;
1687 int c; 1766 int c;
1688 int found = 0; 1767 int found = 0;
1689 int incomplete;
1690 1768
1691 detect_info->checked |= CATEGORY_MASK_EMACS_MULE; 1769 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1692 /* A coding system of this category is always ASCII compatible. */ 1770 /* A coding system of this category is always ASCII compatible. */
1693 src += coding->head_ascii; 1771 src += coding->head_ascii;
1694 1772
1695 while (1) 1773 while (1)
1696 { 1774 {
1697 incomplete = 0; 1775 src_base = src;
1698 ONE_MORE_BYTE (c); 1776 ONE_MORE_BYTE (c);
1699 incomplete = 1; 1777 if (c < 0)
1700 1778 continue;
1701 if (c == 0x80) 1779 if (c == 0x80)
1702 { 1780 {
1703 /* Perhaps the start of composite character. We simple skip 1781 /* Perhaps the start of composite character. We simple skip
1704 it because analyzing it is too heavy for detecting. But, 1782 it because analyzing it is too heavy for detecting. But,
1705 at least, we check that the composite character 1783 at least, we check that the composite character
1743 } 1821 }
1744 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; 1822 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1745 return 0; 1823 return 0;
1746 1824
1747 no_more_source: 1825 no_more_source:
1748 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) 1826 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1749 { 1827 {
1750 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; 1828 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1751 return 0; 1829 return 0;
1752 } 1830 }
1753 detect_info->found |= found; 1831 detect_info->found |= found;
1840 int from, to; \ 1918 int from, to; \
1841 int consumed_chars_limit; \ 1919 int consumed_chars_limit; \
1842 int nbytes, nchars; \ 1920 int nbytes, nchars; \
1843 \ 1921 \
1844 ONE_MORE_BYTE (c); \ 1922 ONE_MORE_BYTE (c); \
1923 if (c < 0) \
1924 goto invalid_code; \
1845 nbytes = c - 0xA0; \ 1925 nbytes = c - 0xA0; \
1846 if (nbytes < 3) \ 1926 if (nbytes < 3) \
1847 goto invalid_code; \ 1927 goto invalid_code; \
1848 ONE_MORE_BYTE (c); \ 1928 ONE_MORE_BYTE (c); \
1929 if (c < 0) \
1930 goto invalid_code; \
1849 nchars = c - 0xA0; \ 1931 nchars = c - 0xA0; \
1850 from = coding->produced + char_offset; \ 1932 from = coding->produced + char_offset; \
1851 to = from + nchars; \ 1933 to = from + nchars; \
1852 ADD_COMPOSITION_DATA (charbuf, from, to, method); \ 1934 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1853 consumed_chars_limit = consumed_chars_base + nbytes; \ 1935 consumed_chars_limit = consumed_chars_base + nbytes; \
1950 2032
1951 if (charbuf >= charbuf_end) 2033 if (charbuf >= charbuf_end)
1952 break; 2034 break;
1953 2035
1954 ONE_MORE_BYTE (c); 2036 ONE_MORE_BYTE (c);
1955 2037 if (c < 0)
1956 if (c < 0x80) 2038 {
2039 *charbuf++ = -c;
2040 char_offset++;
2041 }
2042 else if (c < 0x80)
1957 { 2043 {
1958 *charbuf++ = c; 2044 *charbuf++ = c;
1959 char_offset++; 2045 char_offset++;
1960 } 2046 }
1961 else if (c == 0x80) 2047 else if (c == 0x80)
1962 { 2048 {
1963 ONE_MORE_BYTE (c); 2049 ONE_MORE_BYTE (c);
2050 if (c < 0)
2051 goto invalid_code;
1964 if (c - 0xF2 >= COMPOSITION_RELATIVE 2052 if (c - 0xF2 >= COMPOSITION_RELATIVE
1965 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) 2053 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
1966 DECODE_EMACS_MULE_21_COMPOSITION (c); 2054 DECODE_EMACS_MULE_21_COMPOSITION (c);
1967 else if (c < 0xC0) 2055 else if (c < 0xC0)
1968 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); 2056 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2128 EMIT_ONE_BYTE (code >> 8); 2216 EMIT_ONE_BYTE (code >> 8);
2129 EMIT_ONE_BYTE (code & 0xFF); 2217 EMIT_ONE_BYTE (code & 0xFF);
2130 } 2218 }
2131 } 2219 }
2132 } 2220 }
2133 coding->result = CODING_RESULT_SUCCESS; 2221 record_conversion_result (coding, CODING_RESULT_SUCCESS);
2134 coding->produced_char += produced_chars; 2222 coding->produced_char += produced_chars;
2135 coding->produced = dst - coding->destination; 2223 coding->produced = dst - coding->destination;
2136 return 0; 2224 return 0;
2137 } 2225 }
2138 2226
2428 /* A coding system of this category is always ASCII compatible. */ 2516 /* A coding system of this category is always ASCII compatible. */
2429 src += coding->head_ascii; 2517 src += coding->head_ascii;
2430 2518
2431 while (rejected != CATEGORY_MASK_ISO) 2519 while (rejected != CATEGORY_MASK_ISO)
2432 { 2520 {
2521 src_base = src;
2433 ONE_MORE_BYTE (c); 2522 ONE_MORE_BYTE (c);
2434 switch (c) 2523 switch (c)
2435 { 2524 {
2436 case ISO_CODE_ESC: 2525 case ISO_CODE_ESC:
2437 if (inhibit_iso_escape_detection) 2526 if (inhibit_iso_escape_detection)
2541 & CODING_ISO_FLAG_SINGLE_SHIFT) 2630 & CODING_ISO_FLAG_SINGLE_SHIFT)
2542 found |= CATEGORY_MASK_ISO_8_2; 2631 found |= CATEGORY_MASK_ISO_8_2;
2543 goto check_extra_latin; 2632 goto check_extra_latin;
2544 2633
2545 default: 2634 default:
2635 if (c < 0)
2636 continue;
2546 if (c < 0x80) 2637 if (c < 0x80)
2547 { 2638 {
2548 single_shifting = 0; 2639 single_shifting = 0;
2549 break; 2640 break;
2550 } 2641 }
2814 2905
2815 if (charbuf >= charbuf_end) 2906 if (charbuf >= charbuf_end)
2816 break; 2907 break;
2817 2908
2818 ONE_MORE_BYTE (c1); 2909 ONE_MORE_BYTE (c1);
2910 if (c1 < 0)
2911 goto invalid_code;
2819 2912
2820 /* We produce at most one character. */ 2913 /* We produce at most one character. */
2821 switch (iso_code_class [c1]) 2914 switch (iso_code_class [c1])
2822 { 2915 {
2823 case ISO_0x20_or_0x7F: 2916 case ISO_0x20_or_0x7F:
3184 invalid_code: 3277 invalid_code:
3185 MAYBE_FINISH_COMPOSITION (); 3278 MAYBE_FINISH_COMPOSITION ();
3186 src = src_base; 3279 src = src_base;
3187 consumed_chars = consumed_chars_base; 3280 consumed_chars = consumed_chars_base;
3188 ONE_MORE_BYTE (c); 3281 ONE_MORE_BYTE (c);
3189 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); 3282 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3190 char_offset++; 3283 char_offset++;
3191 coding->errors++; 3284 coding->errors++;
3192 continue; 3285 continue;
3193 3286
3194 break_loop: 3287 break_loop:
3743 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL) 3836 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3744 { 3837 {
3745 ASSURE_DESTINATION (safe_room); 3838 ASSURE_DESTINATION (safe_room);
3746 ENCODE_RESET_PLANE_AND_REGISTER (); 3839 ENCODE_RESET_PLANE_AND_REGISTER ();
3747 } 3840 }
3748 coding->result = CODING_RESULT_SUCCESS; 3841 record_conversion_result (coding, CODING_RESULT_SUCCESS);
3749 CODING_ISO_BOL (coding) = bol_designation; 3842 CODING_ISO_BOL (coding) = bol_designation;
3750 coding->produced_char += produced_chars; 3843 coding->produced_char += produced_chars;
3751 coding->produced = dst - coding->destination; 3844 coding->produced = dst - coding->destination;
3752 return 0; 3845 return 0;
3753 } 3846 }
3796 static int 3889 static int
3797 detect_coding_sjis (coding, detect_info) 3890 detect_coding_sjis (coding, detect_info)
3798 struct coding_system *coding; 3891 struct coding_system *coding;
3799 struct coding_detection_info *detect_info; 3892 struct coding_detection_info *detect_info;
3800 { 3893 {
3801 const unsigned char *src = coding->source, *src_base = src; 3894 const unsigned char *src = coding->source, *src_base;
3802 const unsigned char *src_end = coding->source + coding->src_bytes; 3895 const unsigned char *src_end = coding->source + coding->src_bytes;
3803 int multibytep = coding->src_multibyte; 3896 int multibytep = coding->src_multibyte;
3804 int consumed_chars = 0; 3897 int consumed_chars = 0;
3805 int found = 0; 3898 int found = 0;
3806 int c; 3899 int c;
3807 int incomplete;
3808 3900
3809 detect_info->checked |= CATEGORY_MASK_SJIS; 3901 detect_info->checked |= CATEGORY_MASK_SJIS;
3810 /* A coding system of this category is always ASCII compatible. */ 3902 /* A coding system of this category is always ASCII compatible. */
3811 src += coding->head_ascii; 3903 src += coding->head_ascii;
3812 3904
3813 while (1) 3905 while (1)
3814 { 3906 {
3815 incomplete = 0; 3907 src_base = src;
3816 ONE_MORE_BYTE (c); 3908 ONE_MORE_BYTE (c);
3817 incomplete = 1;
3818 if (c < 0x80) 3909 if (c < 0x80)
3819 continue; 3910 continue;
3820 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) 3911 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3821 { 3912 {
3822 ONE_MORE_BYTE (c); 3913 ONE_MORE_BYTE (c);
3831 } 3922 }
3832 detect_info->rejected |= CATEGORY_MASK_SJIS; 3923 detect_info->rejected |= CATEGORY_MASK_SJIS;
3833 return 0; 3924 return 0;
3834 3925
3835 no_more_source: 3926 no_more_source:
3836 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) 3927 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3837 { 3928 {
3838 detect_info->rejected |= CATEGORY_MASK_SJIS; 3929 detect_info->rejected |= CATEGORY_MASK_SJIS;
3839 return 0; 3930 return 0;
3840 } 3931 }
3841 detect_info->found |= found; 3932 detect_info->found |= found;
3849 static int 3940 static int
3850 detect_coding_big5 (coding, detect_info) 3941 detect_coding_big5 (coding, detect_info)
3851 struct coding_system *coding; 3942 struct coding_system *coding;
3852 struct coding_detection_info *detect_info; 3943 struct coding_detection_info *detect_info;
3853 { 3944 {
3854 const unsigned char *src = coding->source, *src_base = src; 3945 const unsigned char *src = coding->source, *src_base;
3855 const unsigned char *src_end = coding->source + coding->src_bytes; 3946 const unsigned char *src_end = coding->source + coding->src_bytes;
3856 int multibytep = coding->src_multibyte; 3947 int multibytep = coding->src_multibyte;
3857 int consumed_chars = 0; 3948 int consumed_chars = 0;
3858 int found = 0; 3949 int found = 0;
3859 int c; 3950 int c;
3860 int incomplete;
3861 3951
3862 detect_info->checked |= CATEGORY_MASK_BIG5; 3952 detect_info->checked |= CATEGORY_MASK_BIG5;
3863 /* A coding system of this category is always ASCII compatible. */ 3953 /* A coding system of this category is always ASCII compatible. */
3864 src += coding->head_ascii; 3954 src += coding->head_ascii;
3865 3955
3866 while (1) 3956 while (1)
3867 { 3957 {
3868 incomplete = 0; 3958 src_base = src;
3869 ONE_MORE_BYTE (c); 3959 ONE_MORE_BYTE (c);
3870 incomplete = 1;
3871 if (c < 0x80) 3960 if (c < 0x80)
3872 continue; 3961 continue;
3873 if (c >= 0xA1) 3962 if (c >= 0xA1)
3874 { 3963 {
3875 ONE_MORE_BYTE (c); 3964 ONE_MORE_BYTE (c);
3882 } 3971 }
3883 detect_info->rejected |= CATEGORY_MASK_BIG5; 3972 detect_info->rejected |= CATEGORY_MASK_BIG5;
3884 return 0; 3973 return 0;
3885 3974
3886 no_more_source: 3975 no_more_source:
3887 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) 3976 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3888 { 3977 {
3889 detect_info->rejected |= CATEGORY_MASK_BIG5; 3978 detect_info->rejected |= CATEGORY_MASK_BIG5;
3890 return 0; 3979 return 0;
3891 } 3980 }
3892 detect_info->found |= found; 3981 detect_info->found |= found;
3930 4019
3931 if (charbuf >= charbuf_end) 4020 if (charbuf >= charbuf_end)
3932 break; 4021 break;
3933 4022
3934 ONE_MORE_BYTE (c); 4023 ONE_MORE_BYTE (c);
3935 4024 if (c < 0)
4025 goto invalid_code;
3936 if (c < 0x80) 4026 if (c < 0x80)
3937 charset = charset_roman; 4027 charset = charset_roman;
3938 else 4028 else
3939 { 4029 {
3940 if (c >= 0xF0) 4030 if (c >= 0xF0)
3973 4063
3974 invalid_code: 4064 invalid_code:
3975 src = src_base; 4065 src = src_base;
3976 consumed_chars = consumed_chars_base; 4066 consumed_chars = consumed_chars_base;
3977 ONE_MORE_BYTE (c); 4067 ONE_MORE_BYTE (c);
3978 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); 4068 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
3979 char_offset++; 4069 char_offset++;
3980 coding->errors++; 4070 coding->errors++;
3981 } 4071 }
3982 4072
3983 no_more_source: 4073 no_more_source:
4021 if (charbuf >= charbuf_end) 4111 if (charbuf >= charbuf_end)
4022 break; 4112 break;
4023 4113
4024 ONE_MORE_BYTE (c); 4114 ONE_MORE_BYTE (c);
4025 4115
4116 if (c < 0)
4117 goto invalid_code;
4026 if (c < 0x80) 4118 if (c < 0x80)
4027 charset = charset_roman; 4119 charset = charset_roman;
4028 else 4120 else
4029 { 4121 {
4030 /* BIG5 -> Big5 */ 4122 /* BIG5 -> Big5 */
4051 4143
4052 invalid_code: 4144 invalid_code:
4053 src = src_base; 4145 src = src_base;
4054 consumed_chars = consumed_chars_base; 4146 consumed_chars = consumed_chars_base;
4055 ONE_MORE_BYTE (c); 4147 ONE_MORE_BYTE (c);
4056 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); 4148 *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4057 char_offset++; 4149 char_offset++;
4058 coding->errors++; 4150 coding->errors++;
4059 } 4151 }
4060 4152
4061 no_more_source: 4153 no_more_source:
4141 EMIT_ONE_BYTE (code | 0x80); 4233 EMIT_ONE_BYTE (code | 0x80);
4142 else 4234 else
4143 EMIT_ONE_ASCII_BYTE (code & 0x7F); 4235 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4144 } 4236 }
4145 } 4237 }
4146 coding->result = CODING_RESULT_SUCCESS; 4238 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4147 coding->produced_char += produced_chars; 4239 coding->produced_char += produced_chars;
4148 coding->produced = dst - coding->destination; 4240 coding->produced = dst - coding->destination;
4149 return 0; 4241 return 0;
4150 } 4242 }
4151 4243
4212 } 4304 }
4213 else 4305 else
4214 EMIT_ONE_ASCII_BYTE (code & 0x7F); 4306 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4215 } 4307 }
4216 } 4308 }
4217 coding->result = CODING_RESULT_SUCCESS; 4309 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4218 coding->produced_char += produced_chars; 4310 coding->produced_char += produced_chars;
4219 coding->produced = dst - coding->destination; 4311 coding->produced = dst - coding->destination;
4220 return 0; 4312 return 0;
4221 } 4313 }
4222 4314
4231 static int 4323 static int
4232 detect_coding_ccl (coding, detect_info) 4324 detect_coding_ccl (coding, detect_info)
4233 struct coding_system *coding; 4325 struct coding_system *coding;
4234 struct coding_detection_info *detect_info; 4326 struct coding_detection_info *detect_info;
4235 { 4327 {
4236 const unsigned char *src = coding->source, *src_base = src; 4328 const unsigned char *src = coding->source, *src_base;
4237 const unsigned char *src_end = coding->source + coding->src_bytes; 4329 const unsigned char *src_end = coding->source + coding->src_bytes;
4238 int multibytep = coding->src_multibyte; 4330 int multibytep = coding->src_multibyte;
4239 int consumed_chars = 0; 4331 int consumed_chars = 0;
4240 int found = 0; 4332 int found = 0;
4241 unsigned char *valids = CODING_CCL_VALIDS (coding); 4333 unsigned char *valids = CODING_CCL_VALIDS (coding);
4250 src += head_ascii; 4342 src += head_ascii;
4251 4343
4252 while (1) 4344 while (1)
4253 { 4345 {
4254 int c; 4346 int c;
4347
4348 src_base = src;
4255 ONE_MORE_BYTE (c); 4349 ONE_MORE_BYTE (c);
4256 if (! valids[c]) 4350 if (c < 0 || ! valids[c])
4257 break; 4351 break;
4258 if ((valids[c] > 1)) 4352 if ((valids[c] > 1))
4259 found = CATEGORY_MASK_CCL; 4353 found = CATEGORY_MASK_CCL;
4260 } 4354 }
4261 detect_info->rejected |= CATEGORY_MASK_CCL; 4355 detect_info->rejected |= CATEGORY_MASK_CCL;
4327 } 4421 }
4328 4422
4329 switch (ccl.status) 4423 switch (ccl.status)
4330 { 4424 {
4331 case CCL_STAT_SUSPEND_BY_SRC: 4425 case CCL_STAT_SUSPEND_BY_SRC:
4332 coding->result = CODING_RESULT_INSUFFICIENT_SRC; 4426 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4333 break; 4427 break;
4334 case CCL_STAT_SUSPEND_BY_DST: 4428 case CCL_STAT_SUSPEND_BY_DST:
4335 break; 4429 break;
4336 case CCL_STAT_QUIT: 4430 case CCL_STAT_QUIT:
4337 case CCL_STAT_INVALID_CMD: 4431 case CCL_STAT_INVALID_CMD:
4338 coding->result = CODING_RESULT_INTERRUPT; 4432 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4339 break; 4433 break;
4340 default: 4434 default:
4341 coding->result = CODING_RESULT_SUCCESS; 4435 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4342 break; 4436 break;
4343 } 4437 }
4344 coding->consumed_char += consumed_chars; 4438 coding->consumed_char += consumed_chars;
4345 coding->consumed = src - coding->source; 4439 coding->consumed = src - coding->source;
4346 coding->charbuf_used = charbuf - coding->charbuf; 4440 coding->charbuf_used = charbuf - coding->charbuf;
4388 } 4482 }
4389 4483
4390 switch (ccl.status) 4484 switch (ccl.status)
4391 { 4485 {
4392 case CCL_STAT_SUSPEND_BY_SRC: 4486 case CCL_STAT_SUSPEND_BY_SRC:
4393 coding->result = CODING_RESULT_INSUFFICIENT_SRC; 4487 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4394 break; 4488 break;
4395 case CCL_STAT_SUSPEND_BY_DST: 4489 case CCL_STAT_SUSPEND_BY_DST:
4396 coding->result = CODING_RESULT_INSUFFICIENT_DST; 4490 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4397 break; 4491 break;
4398 case CCL_STAT_QUIT: 4492 case CCL_STAT_QUIT:
4399 case CCL_STAT_INVALID_CMD: 4493 case CCL_STAT_INVALID_CMD:
4400 coding->result = CODING_RESULT_INTERRUPT; 4494 record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4401 break; 4495 break;
4402 default: 4496 default:
4403 coding->result = CODING_RESULT_SUCCESS; 4497 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4404 break; 4498 break;
4405 } 4499 }
4406 4500
4407 coding->produced_char += produced_chars; 4501 coding->produced_char += produced_chars;
4408 coding->produced = dst - coding->destination; 4502 coding->produced = dst - coding->destination;
4420 struct coding_system *coding; 4514 struct coding_system *coding;
4421 { 4515 {
4422 coding->chars_at_source = 1; 4516 coding->chars_at_source = 1;
4423 coding->consumed_char = 0; 4517 coding->consumed_char = 0;
4424 coding->consumed = 0; 4518 coding->consumed = 0;
4425 coding->result = CODING_RESULT_SUCCESS; 4519 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4426 } 4520 }
4427 4521
4428 static int 4522 static int
4429 encode_coding_raw_text (coding) 4523 encode_coding_raw_text (coding)
4430 struct coding_system *coding; 4524 struct coding_system *coding;
4498 while (charbuf < charbuf_end && dst < dst_end) 4592 while (charbuf < charbuf_end && dst < dst_end)
4499 *dst++ = *charbuf++; 4593 *dst++ = *charbuf++;
4500 produced_chars = dst - (coding->destination + coding->dst_bytes); 4594 produced_chars = dst - (coding->destination + coding->dst_bytes);
4501 } 4595 }
4502 } 4596 }
4503 coding->result = CODING_RESULT_SUCCESS; 4597 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4504 coding->produced_char += produced_chars; 4598 coding->produced_char += produced_chars;
4505 coding->produced = dst - coding->destination; 4599 coding->produced = dst - coding->destination;
4506 return 0; 4600 return 0;
4507 } 4601 }
4508 4602
4513 static int 4607 static int
4514 detect_coding_charset (coding, detect_info) 4608 detect_coding_charset (coding, detect_info)
4515 struct coding_system *coding; 4609 struct coding_system *coding;
4516 struct coding_detection_info *detect_info; 4610 struct coding_detection_info *detect_info;
4517 { 4611 {
4518 const unsigned char *src = coding->source, *src_base = src; 4612 const unsigned char *src = coding->source, *src_base;
4519 const unsigned char *src_end = coding->source + coding->src_bytes; 4613 const unsigned char *src_end = coding->source + coding->src_bytes;
4520 int multibytep = coding->src_multibyte; 4614 int multibytep = coding->src_multibyte;
4521 int consumed_chars = 0; 4615 int consumed_chars = 0;
4522 Lisp_Object attrs, valids; 4616 Lisp_Object attrs, valids;
4523 int found = 0; 4617 int found = 0;
4533 4627
4534 while (1) 4628 while (1)
4535 { 4629 {
4536 int c; 4630 int c;
4537 4631
4632 src_base = src;
4538 ONE_MORE_BYTE (c); 4633 ONE_MORE_BYTE (c);
4634 if (c < 0)
4635 continue;
4539 if (NILP (AREF (valids, c))) 4636 if (NILP (AREF (valids, c)))
4540 break; 4637 break;
4541 if (c >= 0x80) 4638 if (c >= 0x80)
4542 found = CATEGORY_MASK_CHARSET; 4639 found = CATEGORY_MASK_CHARSET;
4543 } 4640 }
4582 4679
4583 if (charbuf >= charbuf_end) 4680 if (charbuf >= charbuf_end)
4584 break; 4681 break;
4585 4682
4586 ONE_MORE_BYTE (c); 4683 ONE_MORE_BYTE (c);
4684 if (c < 0)
4685 goto invalid_code;
4587 code = c; 4686 code = c;
4588 4687
4589 val = AREF (valids, c); 4688 val = AREF (valids, c);
4590 if (NILP (val)) 4689 if (NILP (val))
4591 goto invalid_code; 4690 goto invalid_code;
4641 4740
4642 invalid_code: 4741 invalid_code:
4643 src = src_base; 4742 src = src_base;
4644 consumed_chars = consumed_chars_base; 4743 consumed_chars = consumed_chars_base;
4645 ONE_MORE_BYTE (c); 4744 ONE_MORE_BYTE (c);
4646 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); 4745 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4647 char_offset++; 4746 char_offset++;
4648 coding->errors++; 4747 coding->errors++;
4649 } 4748 }
4650 4749
4651 no_more_source: 4750 no_more_source:
4712 EMIT_ONE_BYTE (c); 4811 EMIT_ONE_BYTE (c);
4713 } 4812 }
4714 } 4813 }
4715 } 4814 }
4716 4815
4717 coding->result = CODING_RESULT_SUCCESS; 4816 record_conversion_result (coding, CODING_RESULT_SUCCESS);
4718 coding->produced_char += produced_chars; 4817 coding->produced_char += produced_chars;
4719 coding->produced = dst - coding->destination; 4818 coding->produced = dst - coding->destination;
4720 return 0; 4819 return 0;
4721 } 4820 }
4722 4821
5478 { 5577 {
5479 if (EQ (eol_type, Qdos)) 5578 if (EQ (eol_type, Qdos))
5480 { 5579 {
5481 if (src == src_end) 5580 if (src == src_end)
5482 { 5581 {
5483 coding->result = CODING_RESULT_INSUFFICIENT_SRC; 5582 record_conversion_result
5583 (coding, CODING_RESULT_INSUFFICIENT_SRC);
5484 goto no_more_source; 5584 goto no_more_source;
5485 } 5585 }
5486 if (*src == '\n') 5586 if (*src == '\n')
5487 c = *src++; 5587 c = *src++;
5488 } 5588 }
5667 break; \ 5767 break; \
5668 size >>= 1; \ 5768 size >>= 1; \
5669 } \ 5769 } \
5670 if (! coding->charbuf) \ 5770 if (! coding->charbuf) \
5671 { \ 5771 { \
5672 coding->result = CODING_RESULT_INSUFFICIENT_MEM; \ 5772 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5673 return coding->result; \ 5773 return coding->result; \
5674 } \ 5774 } \
5675 coding->charbuf_size = size; \ 5775 coding->charbuf_size = size; \
5676 } while (0) 5776 } while (0)
5677 5777
5757 } 5857 }
5758 5858
5759 coding->consumed = coding->consumed_char = 0; 5859 coding->consumed = coding->consumed_char = 0;
5760 coding->produced = coding->produced_char = 0; 5860 coding->produced = coding->produced_char = 0;
5761 coding->chars_at_source = 0; 5861 coding->chars_at_source = 0;
5762 coding->result = CODING_RESULT_SUCCESS; 5862 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5763 coding->errors = 0; 5863 coding->errors = 0;
5764 5864
5765 ALLOC_CONVERSION_WORK_AREA (coding); 5865 ALLOC_CONVERSION_WORK_AREA (coding);
5766 5866
5767 attrs = CODING_ID_ATTRS (coding->id); 5867 attrs = CODING_ID_ATTRS (coding->id);
5796 if (coding->mode & CODING_MODE_LAST_BLOCK) 5896 if (coding->mode & CODING_MODE_LAST_BLOCK)
5797 { 5897 {
5798 /* Flush out unprocessed data as binary chars. We are sure 5898 /* Flush out unprocessed data as binary chars. We are sure
5799 that the number of data is less than the size of 5899 that the number of data is less than the size of
5800 coding->charbuf. */ 5900 coding->charbuf. */
5901 coding->charbuf_used = 0;
5801 while (nbytes-- > 0) 5902 while (nbytes-- > 0)
5802 { 5903 {
5803 int c = *src++; 5904 int c = *src++;
5804 5905
5805 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c); 5906 coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
6074 = ! NILP (current_buffer->enable_multibyte_characters); 6175 = ! NILP (current_buffer->enable_multibyte_characters);
6075 } 6176 }
6076 6177
6077 coding->consumed = coding->consumed_char = 0; 6178 coding->consumed = coding->consumed_char = 0;
6078 coding->produced = coding->produced_char = 0; 6179 coding->produced = coding->produced_char = 0;
6079 coding->result = CODING_RESULT_SUCCESS; 6180 record_conversion_result (coding, CODING_RESULT_SUCCESS);
6080 coding->errors = 0; 6181 coding->errors = 0;
6081 6182
6082 ALLOC_CONVERSION_WORK_AREA (coding); 6183 ALLOC_CONVERSION_WORK_AREA (coding);
6083 6184
6084 do { 6185 do {
6123 { 6224 {
6124 Lisp_Object name, workbuf; 6225 Lisp_Object name, workbuf;
6125 struct buffer *current; 6226 struct buffer *current;
6126 6227
6127 if (reused_workbuf_in_use++) 6228 if (reused_workbuf_in_use++)
6128 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); 6229 {
6230 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6231 workbuf = Fget_buffer_create (name);
6232 }
6129 else 6233 else
6130 name = Vcode_conversion_workbuf_name; 6234 {
6131 workbuf = Fget_buffer_create (name); 6235 name = Vcode_conversion_workbuf_name;
6236 workbuf = Fget_buffer_create (name);
6237 if (NILP (Vcode_conversion_reused_workbuf))
6238 Vcode_conversion_reused_workbuf = workbuf;
6239 }
6132 current = current_buffer; 6240 current = current_buffer;
6133 set_buffer_internal (XBUFFER (workbuf)); 6241 set_buffer_internal (XBUFFER (workbuf));
6134 Ferase_buffer (); 6242 Ferase_buffer ();
6135 current_buffer->undo_list = Qt; 6243 current_buffer->undo_list = Qt;
6136 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil; 6244 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6387 { 6495 {
6388 destination 6496 destination
6389 = (unsigned char *) xrealloc (destination, coding->produced); 6497 = (unsigned char *) xrealloc (destination, coding->produced);
6390 if (! destination) 6498 if (! destination)
6391 { 6499 {
6392 coding->result = CODING_RESULT_INSUFFICIENT_DST; 6500 record_conversion_result (coding,
6501 CODING_RESULT_INSUFFICIENT_DST);
6393 unbind_to (count, Qnil); 6502 unbind_to (count, Qnil);
6394 return; 6503 return;
6395 } 6504 }
6396 if (BEGV < GPT && GPT < BEGV + coding->produced_char) 6505 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6397 move_gap_both (BEGV, BEGV_BYTE); 6506 move_gap_both (BEGV, BEGV_BYTE);
6417 else 6526 else
6418 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes), 6527 TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6419 saved_pt_byte + (coding->produced - bytes)); 6528 saved_pt_byte + (coding->produced - bytes));
6420 } 6529 }
6421 6530
6422 unbind_to (count, Qnil); 6531 unbind_to (count, coding->dst_object);
6423 } 6532 }
6424 6533
6425 6534
6426 void 6535 void
6427 encode_coding_object (coding, src_object, from, from_byte, to, to_byte, 6536 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7361 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte, 7470 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7362 dst_object); 7471 dst_object);
7363 if (! norecord) 7472 if (! norecord)
7364 Vlast_coding_system_used = CODING_ID_NAME (coding.id); 7473 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7365 7474
7366 if (coding.result != CODING_RESULT_SUCCESS)
7367 error ("Code conversion error: %d", coding.result);
7368
7369 return (BUFFERP (dst_object) 7475 return (BUFFERP (dst_object)
7370 ? make_number (coding.produced_char) 7476 ? make_number (coding.produced_char)
7371 : coding.dst_object); 7477 : coding.dst_object);
7372 } 7478 }
7373 7479
7450 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); 7556 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7451 else 7557 else
7452 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); 7558 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7453 if (! norecord) 7559 if (! norecord)
7454 Vlast_coding_system_used = CODING_ID_NAME (coding.id); 7560 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7455
7456 if (coding.result != CODING_RESULT_SUCCESS)
7457 error ("Code conversion error: %d", coding.result);
7458 7561
7459 return (BUFFERP (dst_object) 7562 return (BUFFERP (dst_object)
7460 ? make_number (coding.produced_char) 7563 ? make_number (coding.produced_char)
7461 : coding.dst_object); 7564 : coding.dst_object);
7462 } 7565 }
8738 ASET (Vcoding_category_table, coding_category_raw_text, 8841 ASET (Vcoding_category_table, coding_category_raw_text,
8739 intern ("coding-category-raw-text")); 8842 intern ("coding-category-raw-text"));
8740 ASET (Vcoding_category_table, coding_category_undecided, 8843 ASET (Vcoding_category_table, coding_category_undecided,
8741 intern ("coding-category-undecided")); 8844 intern ("coding-category-undecided"));
8742 8845
8846 DEFSYM (Qinsufficient_source, "insufficient-source");
8847 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
8848 DEFSYM (Qinvalid_source, "invalid-source");
8849 DEFSYM (Qinterrupted, "interrupted");
8850 DEFSYM (Qinsufficient_memory, "insufficient-memory");
8851
8743 defsubr (&Scoding_system_p); 8852 defsubr (&Scoding_system_p);
8744 defsubr (&Sread_coding_system); 8853 defsubr (&Sread_coding_system);
8745 defsubr (&Sread_non_nil_coding_system); 8854 defsubr (&Sread_non_nil_coding_system);
8746 defsubr (&Scheck_coding_system); 8855 defsubr (&Scheck_coding_system);
8747 defsubr (&Sdetect_coding_region); 8856 defsubr (&Sdetect_coding_region);
8833 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used, 8942 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8834 doc: /* 8943 doc: /*
8835 Coding system used in the latest file or process I/O. */); 8944 Coding system used in the latest file or process I/O. */);
8836 Vlast_coding_system_used = Qnil; 8945 Vlast_coding_system_used = Qnil;
8837 8946
8947 DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
8948 doc: /*
8949 Error status of the last code conversion.
8950
8951 When an error was detected in the last code conversion, this variable
8952 is set to one of the following symbols.
8953 `insufficient-source'
8954 `inconsistent-eol'
8955 `invalid-source'
8956 `interrupted'
8957 `insufficient-memory'
8958 When no error was detected, the value doesn't change. So, to check
8959 the error status of a code conversion by this variable, you must
8960 explicitly set this variable to nil before performing code
8961 conversion. */);
8962 Vlast_code_conversion_error = Qnil;
8963
8838 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion, 8964 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8839 doc: /* 8965 doc: /*
8840 *Non-nil means always inhibit code conversion of end-of-line format. 8966 *Non-nil means always inhibit code conversion of end-of-line format.
8841 See info node `Coding Systems' and info node `Text and Binary' concerning 8967 See info node `Coding Systems' and info node `Text and Binary' concerning
8842 such conversion. */); 8968 such conversion. */);