Mercurial > emacs
comparison src/coding.c @ 89331:1892a75ffcac
(CATEGORY_MASK_RAW_TEXT): New macro.
(detect_coding_utf_8, detect_coding_utf_16)
(detect_coding_emacs_mule, detect_coding_iso_2022)
(detect_coding_sjis, detect_coding_big5)
(detect_coding_ccl, detect_coding_charset): Change argument MASK
to DETECT_INFO. Update DETECT_INFO and return 1 if the byte
sequence is valid in this coding system. Callers changed.
(MAX_ANNOTATION_LENGTH): New macro.
(ADD_ANNOTATION_DATA): New macro.
(ADD_COMPOSITION_DATA): Argument changed. Callers changed. Call
ADD_ANNOTATION_DATA. The format of annotation data changed.
(ADD_CHARSET_DATA): New macro.
(emacs_mule_char): New argument ID. Callers changed.
(decode_coding_emacs_mule, decode_coding_iso_2022)
(decode_coding_sjis, decode_coding_big5, decode_coding_charset):
Produce charset annotation data in coding->charbuf.
(encode_coding_emacs_mule, encode_coding_iso_2022): Pay attention
to charset annotation data in coding->charbuf.
(setup_coding_system): Add CODING_ANNOTATE_CHARSET_MASK
coding->common_flags if the coding system is iso-2022 based and
uses designation.
(produce_composition): Adjusted for the new annotation data
format.
(produce_charset): New function.
(produce_annotation): Handle charset annotation.
(handle_composition_annotation, handle_charset_annotation): New
functions.
(consume_chars): Handle charset annotation. Utilize the above two
functions.
(encode_coding_object): If SRC_OBJECT and DST_OBJECT are the same
buffer, get the deleted text as a string and set
coding->src_object to that string.
(detect_coding, detect_coding_system): Use the new struct
coding_detection_info.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Mon, 06 Jan 2003 11:37:17 +0000 |
| parents | 1fd77c471ee6 |
| children | 4cc9e57fcabc |
comparison
equal
deleted
inserted
replaced
| 89330:ee0338e83a2b | 89331:1892a75ffcac |
|---|---|
| 142 | 142 |
| 143 | 143 |
| 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions *** | 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions *** |
| 145 | 145 |
| 146 These functions check if a byte sequence specified as a source in | 146 These functions check if a byte sequence specified as a source in |
| 147 CODING conforms to the format of XXX. Return 1 if the data contains | 147 CODING conforms to the format of XXX, and update the members of |
| 148 a byte sequence which can be decoded into non-ASCII characters by | 148 DETECT_INFO. |
| 149 the coding system. Otherwize (i.e. the data contains only ASCII | 149 |
| 150 characters or invalid sequence) return 0. | 150 Return 1 if the byte sequence conforms to XXX, otherwise return 0. |
| 151 | |
| 152 It also resets some bits of an integer pointed by MASK. The macros | |
| 153 CATEGORY_MASK_XXX specifies each bit of this integer. | |
| 154 | 151 |
| 155 Below is the template of these functions. */ | 152 Below is the template of these functions. */ |
| 156 | 153 |
| 157 #if 0 | 154 #if 0 |
| 158 static int | 155 static int |
| 159 detect_coding_XXX (coding, mask) | 156 detect_coding_XXX (coding, detect_info) |
| 160 struct coding_system *coding; | 157 struct coding_system *coding; |
| 161 int *mask; | 158 struct coding_detection_info *detect_info; |
| 162 { | 159 { |
| 163 unsigned char *src = coding->source; | 160 unsigned char *src = coding->source; |
| 164 unsigned char *src_end = coding->source + coding->src_bytes; | 161 unsigned char *src_end = coding->source + coding->src_bytes; |
| 165 int multibytep = coding->src_multibyte; | 162 int multibytep = coding->src_multibyte; |
| 166 int c; | 163 int consumed_chars = 0; |
| 167 int found = 0; | 164 int found = 0; |
| 168 ...; | 165 ...; |
| 169 | 166 |
| 170 while (1) | 167 while (1) |
| 171 { | 168 { |
| 172 /* Get one byte from the source. If the souce is exausted, jump | 169 /* Get one byte from the source. If the souce is exausted, jump |
| 173 to no_more_source:. */ | 170 to no_more_source:. */ |
| 174 ONE_MORE_BYTE (c); | 171 ONE_MORE_BYTE (c); |
| 175 /* Check if it conforms to XXX. If not, break the loop. */ | 172 |
| 176 } | 173 if (! __C_conforms_to_XXX___ (c)) |
| 177 /* As the data is invalid for XXX, reset a proper bits. */ | 174 break; |
| 178 *mask &= ~CODING_CATEGORY_XXX; | 175 if (! __C_strongly_suggests_XXX__ (c)) |
| 176 found = CATEGORY_MASK_XXX; | |
| 177 } | |
| 178 /* The byte sequence is invalid for XXX. */ | |
| 179 detect_info->rejected |= CATEGORY_MASK_XXX; | |
| 179 return 0; | 180 return 0; |
| 181 | |
| 180 no_more_source: | 182 no_more_source: |
| 181 /* The source exausted. */ | 183 /* The source exausted successfully. */ |
| 182 if (!found) | 184 detect_info->found |= found; |
| 183 /* ASCII characters only. */ | |
| 184 return 0; | |
| 185 /* Some data should be decoded into non-ASCII characters. */ | |
| 186 *mask &= CODING_CATEGORY_XXX; | |
| 187 return 1; | 185 return 1; |
| 188 } | 186 } |
| 189 #endif | 187 #endif |
| 190 | 188 |
| 191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions *** | 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions *** |
| 406 /* Two special coding systems. */ | 404 /* Two special coding systems. */ |
| 407 Lisp_Object Vsjis_coding_system; | 405 Lisp_Object Vsjis_coding_system; |
| 408 Lisp_Object Vbig5_coding_system; | 406 Lisp_Object Vbig5_coding_system; |
| 409 | 407 |
| 410 | 408 |
| 411 static int detect_coding_utf_8 P_ ((struct coding_system *, int *)); | 409 static int detect_coding_utf_8 P_ ((struct coding_system *, |
| 410 struct coding_detection_info *info)); | |
| 412 static void decode_coding_utf_8 P_ ((struct coding_system *)); | 411 static void decode_coding_utf_8 P_ ((struct coding_system *)); |
| 413 static int encode_coding_utf_8 P_ ((struct coding_system *)); | 412 static int encode_coding_utf_8 P_ ((struct coding_system *)); |
| 414 | 413 |
| 415 static int detect_coding_utf_16 P_ ((struct coding_system *, int *)); | 414 static int detect_coding_utf_16 P_ ((struct coding_system *, |
| 415 struct coding_detection_info *info)); | |
| 416 static void decode_coding_utf_16 P_ ((struct coding_system *)); | 416 static void decode_coding_utf_16 P_ ((struct coding_system *)); |
| 417 static int encode_coding_utf_16 P_ ((struct coding_system *)); | 417 static int encode_coding_utf_16 P_ ((struct coding_system *)); |
| 418 | 418 |
| 419 static int detect_coding_iso_2022 P_ ((struct coding_system *, int *)); | 419 static int detect_coding_iso_2022 P_ ((struct coding_system *, |
| 420 struct coding_detection_info *info)); | |
| 420 static void decode_coding_iso_2022 P_ ((struct coding_system *)); | 421 static void decode_coding_iso_2022 P_ ((struct coding_system *)); |
| 421 static int encode_coding_iso_2022 P_ ((struct coding_system *)); | 422 static int encode_coding_iso_2022 P_ ((struct coding_system *)); |
| 422 | 423 |
| 423 static int detect_coding_emacs_mule P_ ((struct coding_system *, int *)); | 424 static int detect_coding_emacs_mule P_ ((struct coding_system *, |
| 425 struct coding_detection_info *info)); | |
| 424 static void decode_coding_emacs_mule P_ ((struct coding_system *)); | 426 static void decode_coding_emacs_mule P_ ((struct coding_system *)); |
| 425 static int encode_coding_emacs_mule P_ ((struct coding_system *)); | 427 static int encode_coding_emacs_mule P_ ((struct coding_system *)); |
| 426 | 428 |
| 427 static int detect_coding_sjis P_ ((struct coding_system *, int *)); | 429 static int detect_coding_sjis P_ ((struct coding_system *, |
| 430 struct coding_detection_info *info)); | |
| 428 static void decode_coding_sjis P_ ((struct coding_system *)); | 431 static void decode_coding_sjis P_ ((struct coding_system *)); |
| 429 static int encode_coding_sjis P_ ((struct coding_system *)); | 432 static int encode_coding_sjis P_ ((struct coding_system *)); |
| 430 | 433 |
| 431 static int detect_coding_big5 P_ ((struct coding_system *, int *)); | 434 static int detect_coding_big5 P_ ((struct coding_system *, |
| 435 struct coding_detection_info *info)); | |
| 432 static void decode_coding_big5 P_ ((struct coding_system *)); | 436 static void decode_coding_big5 P_ ((struct coding_system *)); |
| 433 static int encode_coding_big5 P_ ((struct coding_system *)); | 437 static int encode_coding_big5 P_ ((struct coding_system *)); |
| 434 | 438 |
| 435 static int detect_coding_ccl P_ ((struct coding_system *, int *)); | 439 static int detect_coding_ccl P_ ((struct coding_system *, |
| 440 struct coding_detection_info *info)); | |
| 436 static void decode_coding_ccl P_ ((struct coding_system *)); | 441 static void decode_coding_ccl P_ ((struct coding_system *)); |
| 437 static int encode_coding_ccl P_ ((struct coding_system *)); | 442 static int encode_coding_ccl P_ ((struct coding_system *)); |
| 438 | 443 |
| 439 static void decode_coding_raw_text P_ ((struct coding_system *)); | 444 static void decode_coding_raw_text P_ ((struct coding_system *)); |
| 440 static int encode_coding_raw_text P_ ((struct coding_system *)); | 445 static int encode_coding_raw_text P_ ((struct coding_system *)); |
| 629 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset) | 634 #define CATEGORY_MASK_CHARSET (1 << coding_category_charset) |
| 630 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis) | 635 #define CATEGORY_MASK_SJIS (1 << coding_category_sjis) |
| 631 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5) | 636 #define CATEGORY_MASK_BIG5 (1 << coding_category_big5) |
| 632 #define CATEGORY_MASK_CCL (1 << coding_category_ccl) | 637 #define CATEGORY_MASK_CCL (1 << coding_category_ccl) |
| 633 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule) | 638 #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule) |
| 639 #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text) | |
| 634 | 640 |
| 635 /* This value is returned if detect_coding_mask () find nothing other | 641 /* This value is returned if detect_coding_mask () find nothing other |
| 636 than ASCII characters. */ | 642 than ASCII characters. */ |
| 637 #define CATEGORY_MASK_ANY \ | 643 #define CATEGORY_MASK_ANY \ |
| 638 (CATEGORY_MASK_ISO_7 \ | 644 (CATEGORY_MASK_ISO_7 \ |
| 1000 coding_set_destination (coding); | 1006 coding_set_destination (coding); |
| 1001 dst = coding->destination + offset; | 1007 dst = coding->destination + offset; |
| 1002 return dst; | 1008 return dst; |
| 1003 } | 1009 } |
| 1004 | 1010 |
| 1011 /** Macros for annotations. */ | |
| 1012 | |
| 1013 /* Maximum length of annotation data (sum of annotations for | |
| 1014 composition and charset). */ | |
| 1015 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5) | |
| 1016 | |
| 1017 /* An annotation data is stored in the array coding->charbuf in this | |
| 1018 format: | |
| 1019 [ -LENGTH ANNOTATION_MASK FROM TO ... ] | |
| 1020 LENGTH is the number of elements in the annotation. | |
| 1021 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK. | |
| 1022 FROM and TO specify the range of text annotated. They are relative | |
| 1023 to coding->src_pos (on encoding) or coding->dst_pos (on decoding). | |
| 1024 | |
| 1025 The format of the following elements depend on ANNOTATION_MASK. | |
| 1026 | |
| 1027 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements | |
| 1028 follows: | |
| 1029 ... METHOD [ COMPOSITION-COMPONENTS ... ] | |
| 1030 METHOD is one of enum composition_method. | |
| 1031 Optionnal COMPOSITION-COMPONENTS are characters and composition | |
| 1032 rules. | |
| 1033 | |
| 1034 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID | |
| 1035 follows. */ | |
| 1036 | |
| 1037 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \ | |
| 1038 do { \ | |
| 1039 *(buf)++ = -(len); \ | |
| 1040 *(buf)++ = (mask); \ | |
| 1041 *(buf)++ = (from); \ | |
| 1042 *(buf)++ = (to); \ | |
| 1043 coding->annotated = 1; \ | |
| 1044 } while (0); | |
| 1045 | |
| 1046 #define ADD_COMPOSITION_DATA(buf, from, to, method) \ | |
| 1047 do { \ | |
| 1048 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \ | |
| 1049 *buf++ = method; \ | |
| 1050 } while (0) | |
| 1051 | |
| 1052 | |
| 1053 #define ADD_CHARSET_DATA(buf, from, to, id) \ | |
| 1054 do { \ | |
| 1055 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \ | |
| 1056 *buf++ = id; \ | |
| 1057 } while (0) | |
| 1058 | |
| 1005 | 1059 |
| 1006 /*** 2. Emacs' internal format (emacs-utf-8) ***/ | 1060 /*** 2. Emacs' internal format (emacs-utf-8) ***/ |
| 1007 | 1061 |
| 1008 | 1062 |
| 1009 | 1063 |
| 1010 | 1064 |
| 1011 /*** 3. UTF-8 ***/ | 1065 /*** 3. UTF-8 ***/ |
| 1012 | 1066 |
| 1013 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 1067 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 1014 Check if a text is encoded in UTF-8. If it is, return | 1068 Check if a text is encoded in UTF-8. If it is, return 1, else |
| 1015 CATEGORY_MASK_UTF_8, else return 0. */ | 1069 return 0. */ |
| 1016 | 1070 |
| 1017 #define UTF_8_1_OCTET_P(c) ((c) < 0x80) | 1071 #define UTF_8_1_OCTET_P(c) ((c) < 0x80) |
| 1018 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80) | 1072 #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80) |
| 1019 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0) | 1073 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0) |
| 1020 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0) | 1074 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0) |
| 1021 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) | 1075 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) |
| 1022 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) | 1076 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) |
| 1023 | 1077 |
| 1024 static int | 1078 static int |
| 1025 detect_coding_utf_8 (coding, mask) | 1079 detect_coding_utf_8 (coding, detect_info) |
| 1026 struct coding_system *coding; | 1080 struct coding_system *coding; |
| 1027 int *mask; | 1081 struct coding_detection_info *detect_info; |
| 1028 { | 1082 { |
| 1029 unsigned char *src = coding->source, *src_base = src; | 1083 unsigned char *src = coding->source, *src_base = src; |
| 1030 unsigned char *src_end = coding->source + coding->src_bytes; | 1084 unsigned char *src_end = coding->source + coding->src_bytes; |
| 1031 int multibytep = coding->src_multibyte; | 1085 int multibytep = coding->src_multibyte; |
| 1032 int consumed_chars = 0; | 1086 int consumed_chars = 0; |
| 1033 int found = 0; | 1087 int found = 0; |
| 1034 int incomplete; | 1088 int incomplete; |
| 1035 | 1089 |
| 1090 detect_info->checked |= CATEGORY_MASK_UTF_8; | |
| 1036 /* A coding system of this category is always ASCII compatible. */ | 1091 /* A coding system of this category is always ASCII compatible. */ |
| 1037 src += coding->head_ascii; | 1092 src += coding->head_ascii; |
| 1038 | 1093 |
| 1039 while (1) | 1094 while (1) |
| 1040 { | 1095 { |
| 1048 ONE_MORE_BYTE (c1); | 1103 ONE_MORE_BYTE (c1); |
| 1049 if (! UTF_8_EXTRA_OCTET_P (c1)) | 1104 if (! UTF_8_EXTRA_OCTET_P (c1)) |
| 1050 break; | 1105 break; |
| 1051 if (UTF_8_2_OCTET_LEADING_P (c)) | 1106 if (UTF_8_2_OCTET_LEADING_P (c)) |
| 1052 { | 1107 { |
| 1053 found++; | 1108 found = CATEGORY_MASK_UTF_8; |
| 1054 continue; | 1109 continue; |
| 1055 } | 1110 } |
| 1056 ONE_MORE_BYTE (c2); | 1111 ONE_MORE_BYTE (c2); |
| 1057 if (! UTF_8_EXTRA_OCTET_P (c2)) | 1112 if (! UTF_8_EXTRA_OCTET_P (c2)) |
| 1058 break; | 1113 break; |
| 1059 if (UTF_8_3_OCTET_LEADING_P (c)) | 1114 if (UTF_8_3_OCTET_LEADING_P (c)) |
| 1060 { | 1115 { |
| 1061 found++; | 1116 found = CATEGORY_MASK_UTF_8; |
| 1062 continue; | 1117 continue; |
| 1063 } | 1118 } |
| 1064 ONE_MORE_BYTE (c3); | 1119 ONE_MORE_BYTE (c3); |
| 1065 if (! UTF_8_EXTRA_OCTET_P (c3)) | 1120 if (! UTF_8_EXTRA_OCTET_P (c3)) |
| 1066 break; | 1121 break; |
| 1067 if (UTF_8_4_OCTET_LEADING_P (c)) | 1122 if (UTF_8_4_OCTET_LEADING_P (c)) |
| 1068 { | 1123 { |
| 1069 found++; | 1124 found = CATEGORY_MASK_UTF_8; |
| 1070 continue; | 1125 continue; |
| 1071 } | 1126 } |
| 1072 ONE_MORE_BYTE (c4); | 1127 ONE_MORE_BYTE (c4); |
| 1073 if (! UTF_8_EXTRA_OCTET_P (c4)) | 1128 if (! UTF_8_EXTRA_OCTET_P (c4)) |
| 1074 break; | 1129 break; |
| 1075 if (UTF_8_5_OCTET_LEADING_P (c)) | 1130 if (UTF_8_5_OCTET_LEADING_P (c)) |
| 1076 { | 1131 { |
| 1077 found++; | 1132 found = CATEGORY_MASK_UTF_8; |
| 1078 continue; | 1133 continue; |
| 1079 } | 1134 } |
| 1080 break; | 1135 break; |
| 1081 } | 1136 } |
| 1082 *mask &= ~CATEGORY_MASK_UTF_8; | 1137 detect_info->rejected |= CATEGORY_MASK_UTF_8; |
| 1083 return 0; | 1138 return 0; |
| 1084 | 1139 |
| 1085 no_more_source: | 1140 no_more_source: |
| 1086 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) | 1141 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) |
| 1087 { | 1142 { |
| 1088 *mask &= ~CATEGORY_MASK_UTF_8; | 1143 detect_info->rejected |= CATEGORY_MASK_UTF_8; |
| 1089 return 0; | 1144 return 0; |
| 1090 } | 1145 } |
| 1091 return found; | 1146 detect_info->found |= found; |
| 1147 return 1; | |
| 1092 } | 1148 } |
| 1093 | 1149 |
| 1094 | 1150 |
| 1095 static void | 1151 static void |
| 1096 decode_coding_utf_8 (coding) | 1152 decode_coding_utf_8 (coding) |
| 1267 return 0; | 1323 return 0; |
| 1268 } | 1324 } |
| 1269 | 1325 |
| 1270 | 1326 |
| 1271 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 1327 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 1272 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or | 1328 Check if a text is encoded in one of UTF-16 based coding systems. |
| 1273 Little Endian (otherwise). If it is, return | 1329 If it is, return 1, else return 0. */ |
| 1274 CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE, | |
| 1275 else return 0. */ | |
| 1276 | 1330 |
| 1277 #define UTF_16_HIGH_SURROGATE_P(val) \ | 1331 #define UTF_16_HIGH_SURROGATE_P(val) \ |
| 1278 (((val) & 0xFC00) == 0xD800) | 1332 (((val) & 0xFC00) == 0xD800) |
| 1279 | 1333 |
| 1280 #define UTF_16_LOW_SURROGATE_P(val) \ | 1334 #define UTF_16_LOW_SURROGATE_P(val) \ |
| 1285 || ((val) == 0xFFFF) \ | 1339 || ((val) == 0xFFFF) \ |
| 1286 || UTF_16_LOW_SURROGATE_P (val)) | 1340 || UTF_16_LOW_SURROGATE_P (val)) |
| 1287 | 1341 |
| 1288 | 1342 |
| 1289 static int | 1343 static int |
| 1290 detect_coding_utf_16 (coding, mask) | 1344 detect_coding_utf_16 (coding, detect_info) |
| 1291 struct coding_system *coding; | 1345 struct coding_system *coding; |
| 1292 int *mask; | 1346 struct coding_detection_info *detect_info; |
| 1293 { | 1347 { |
| 1294 unsigned char *src = coding->source, *src_base = src; | 1348 unsigned char *src = coding->source, *src_base = src; |
| 1295 unsigned char *src_end = coding->source + coding->src_bytes; | 1349 unsigned char *src_end = coding->source + coding->src_bytes; |
| 1296 int multibytep = coding->src_multibyte; | 1350 int multibytep = coding->src_multibyte; |
| 1297 int consumed_chars = 0; | 1351 int consumed_chars = 0; |
| 1298 int c1, c2; | 1352 int c1, c2; |
| 1299 | 1353 |
| 1300 *mask &= ~CATEGORY_MASK_UTF_16; | 1354 detect_info->checked |= CATEGORY_MASK_UTF_16; |
| 1301 | 1355 |
| 1356 if (coding->mode & CODING_MODE_LAST_BLOCK | |
| 1357 && (coding->src_bytes & 1)) | |
| 1358 { | |
| 1359 detect_info->rejected |= CATEGORY_MASK_UTF_16; | |
| 1360 return 0; | |
| 1361 } | |
| 1302 ONE_MORE_BYTE (c1); | 1362 ONE_MORE_BYTE (c1); |
| 1303 ONE_MORE_BYTE (c2); | 1363 ONE_MORE_BYTE (c2); |
| 1304 | 1364 |
| 1305 if ((c1 == 0xFF) && (c2 == 0xFE)) | 1365 if ((c1 == 0xFF) && (c2 == 0xFE)) |
| 1306 *mask |= CATEGORY_MASK_UTF_16_LE; | 1366 { |
| 1367 detect_info->found |= CATEGORY_MASK_UTF_16_LE; | |
| 1368 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; | |
| 1369 } | |
| 1307 else if ((c1 == 0xFE) && (c2 == 0xFF)) | 1370 else if ((c1 == 0xFE) && (c2 == 0xFF)) |
| 1308 *mask |= CATEGORY_MASK_UTF_16_BE; | 1371 { |
| 1309 else | 1372 detect_info->found |= CATEGORY_MASK_UTF_16_BE; |
| 1310 *mask |= CATEGORY_MASK_UTF_16_BE_NOSIG | CATEGORY_MASK_UTF_16_LE_NOSIG; | 1373 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; |
| 1374 } | |
| 1375 no_more_source: | |
| 1311 return 1; | 1376 return 1; |
| 1312 | |
| 1313 no_more_source: | |
| 1314 return 0; | |
| 1315 } | 1377 } |
| 1316 | 1378 |
| 1317 static void | 1379 static void |
| 1318 decode_coding_utf_16 (coding) | 1380 decode_coding_utf_16 (coding) |
| 1319 struct coding_system *coding; | 1381 struct coding_system *coding; |
| 1557 */ | 1619 */ |
| 1558 | 1620 |
| 1559 char emacs_mule_bytes[256]; | 1621 char emacs_mule_bytes[256]; |
| 1560 | 1622 |
| 1561 int | 1623 int |
| 1562 emacs_mule_char (coding, src, nbytes, nchars) | 1624 emacs_mule_char (coding, src, nbytes, nchars, id) |
| 1563 struct coding_system *coding; | 1625 struct coding_system *coding; |
| 1564 unsigned char *src; | 1626 unsigned char *src; |
| 1565 int *nbytes, *nchars; | 1627 int *nbytes, *nchars, *id; |
| 1566 { | 1628 { |
| 1567 unsigned char *src_end = coding->source + coding->src_bytes; | 1629 unsigned char *src_end = coding->source + coding->src_bytes; |
| 1568 int multibytep = coding->src_multibyte; | 1630 int multibytep = coding->src_multibyte; |
| 1569 unsigned char *src_base = src; | 1631 unsigned char *src_base = src; |
| 1570 struct charset *charset; | 1632 struct charset *charset; |
| 1625 c = DECODE_CHAR (charset, code); | 1687 c = DECODE_CHAR (charset, code); |
| 1626 if (c < 0) | 1688 if (c < 0) |
| 1627 goto invalid_code; | 1689 goto invalid_code; |
| 1628 *nbytes = src - src_base; | 1690 *nbytes = src - src_base; |
| 1629 *nchars = consumed_chars; | 1691 *nchars = consumed_chars; |
| 1692 if (id) | |
| 1693 *id = charset->id; | |
| 1630 return c; | 1694 return c; |
| 1631 | 1695 |
| 1632 no_more_source: | 1696 no_more_source: |
| 1633 return -2; | 1697 return -2; |
| 1634 | 1698 |
| 1636 return -1; | 1700 return -1; |
| 1637 } | 1701 } |
| 1638 | 1702 |
| 1639 | 1703 |
| 1640 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 1704 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 1641 Check if a text is encoded in `emacs-mule'. */ | 1705 Check if a text is encoded in `emacs-mule'. If it is, return 1, |
| 1706 else return 0. */ | |
| 1642 | 1707 |
| 1643 static int | 1708 static int |
| 1644 detect_coding_emacs_mule (coding, mask) | 1709 detect_coding_emacs_mule (coding, detect_info) |
| 1645 struct coding_system *coding; | 1710 struct coding_system *coding; |
| 1646 int *mask; | 1711 struct coding_detection_info *detect_info; |
| 1647 { | 1712 { |
| 1648 unsigned char *src = coding->source, *src_base = src; | 1713 unsigned char *src = coding->source, *src_base = src; |
| 1649 unsigned char *src_end = coding->source + coding->src_bytes; | 1714 unsigned char *src_end = coding->source + coding->src_bytes; |
| 1650 int multibytep = coding->src_multibyte; | 1715 int multibytep = coding->src_multibyte; |
| 1651 int consumed_chars = 0; | 1716 int consumed_chars = 0; |
| 1652 int c; | 1717 int c; |
| 1653 int found = 0; | 1718 int found = 0; |
| 1654 int incomplete; | 1719 int incomplete; |
| 1655 | 1720 |
| 1721 detect_info->checked |= CATEGORY_MASK_EMACS_MULE; | |
| 1656 /* A coding system of this category is always ASCII compatible. */ | 1722 /* A coding system of this category is always ASCII compatible. */ |
| 1657 src += coding->head_ascii; | 1723 src += coding->head_ascii; |
| 1658 | 1724 |
| 1659 while (1) | 1725 while (1) |
| 1660 { | 1726 { |
| 1678 } | 1744 } |
| 1679 while (c >= 0xA0); | 1745 while (c >= 0xA0); |
| 1680 | 1746 |
| 1681 if (src - src_base <= 4) | 1747 if (src - src_base <= 4) |
| 1682 break; | 1748 break; |
| 1683 found = 1; | 1749 found = CATEGORY_MASK_EMACS_MULE; |
| 1684 if (c == 0x80) | 1750 if (c == 0x80) |
| 1685 goto repeat; | 1751 goto repeat; |
| 1686 } | 1752 } |
| 1687 | 1753 |
| 1688 if (c < 0x80) | 1754 if (c < 0x80) |
| 1700 ONE_MORE_BYTE (c); | 1766 ONE_MORE_BYTE (c); |
| 1701 } | 1767 } |
| 1702 while (c >= 0xA0); | 1768 while (c >= 0xA0); |
| 1703 if (src - src_base != emacs_mule_bytes[*src_base]) | 1769 if (src - src_base != emacs_mule_bytes[*src_base]) |
| 1704 break; | 1770 break; |
| 1705 found = 1; | 1771 found = CATEGORY_MASK_EMACS_MULE; |
| 1706 } | 1772 } |
| 1707 } | 1773 } |
| 1708 *mask &= ~CATEGORY_MASK_EMACS_MULE; | 1774 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; |
| 1709 return 0; | 1775 return 0; |
| 1710 | 1776 |
| 1711 no_more_source: | 1777 no_more_source: |
| 1712 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) | 1778 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) |
| 1713 { | 1779 { |
| 1714 *mask &= ~CATEGORY_MASK_EMACS_MULE; | 1780 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; |
| 1715 return 0; | 1781 return 0; |
| 1716 } | 1782 } |
| 1717 return found; | 1783 detect_info->found |= found; |
| 1784 return 1; | |
| 1718 } | 1785 } |
| 1719 | 1786 |
| 1720 | 1787 |
| 1721 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 1788 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 1722 | 1789 |
| 1733 int c; \ | 1800 int c; \ |
| 1734 int nbytes, nchars; \ | 1801 int nbytes, nchars; \ |
| 1735 \ | 1802 \ |
| 1736 if (src == src_end) \ | 1803 if (src == src_end) \ |
| 1737 break; \ | 1804 break; \ |
| 1738 c = emacs_mule_char (coding, src, &nbytes, &nchars); \ | 1805 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\ |
| 1739 if (c < 0) \ | 1806 if (c < 0) \ |
| 1740 { \ | 1807 { \ |
| 1741 if (c == -2) \ | 1808 if (c == -2) \ |
| 1742 break; \ | 1809 break; \ |
| 1743 goto invalid_code; \ | 1810 goto invalid_code; \ |
| 1790 goto invalid_code; \ | 1857 goto invalid_code; \ |
| 1791 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ | 1858 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ |
| 1792 } while (0) | 1859 } while (0) |
| 1793 | 1860 |
| 1794 | 1861 |
| 1795 #define ADD_COMPOSITION_DATA(buf, method, nchars) \ | |
| 1796 do { \ | |
| 1797 *buf++ = -5; \ | |
| 1798 *buf++ = coding->produced_char + char_offset; \ | |
| 1799 *buf++ = CODING_ANNOTATE_COMPOSITION_MASK; \ | |
| 1800 *buf++ = method; \ | |
| 1801 *buf++ = nchars; \ | |
| 1802 } while (0) | |
| 1803 | |
| 1804 | |
| 1805 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ | 1862 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ |
| 1806 do { \ | 1863 do { \ |
| 1807 /* Emacs 21 style format. The first three bytes at SRC are \ | 1864 /* Emacs 21 style format. The first three bytes at SRC are \ |
| 1808 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ | 1865 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ |
| 1809 the byte length of this composition information, CHARS is the \ | 1866 the byte length of this composition information, CHARS is the \ |
| 1810 number of characters composed by this composition. */ \ | 1867 number of characters composed by this composition. */ \ |
| 1811 enum composition_method method = c - 0xF2; \ | 1868 enum composition_method method = c - 0xF2; \ |
| 1812 int *charbuf_base = charbuf; \ | 1869 int *charbuf_base = charbuf; \ |
| 1870 int from, to; \ | |
| 1813 int consumed_chars_limit; \ | 1871 int consumed_chars_limit; \ |
| 1814 int nbytes, nchars; \ | 1872 int nbytes, nchars; \ |
| 1815 \ | 1873 \ |
| 1816 ONE_MORE_BYTE (c); \ | 1874 ONE_MORE_BYTE (c); \ |
| 1817 nbytes = c - 0xA0; \ | 1875 nbytes = c - 0xA0; \ |
| 1818 if (nbytes < 3) \ | 1876 if (nbytes < 3) \ |
| 1819 goto invalid_code; \ | 1877 goto invalid_code; \ |
| 1820 ONE_MORE_BYTE (c); \ | 1878 ONE_MORE_BYTE (c); \ |
| 1821 nchars = c - 0xA0; \ | 1879 nchars = c - 0xA0; \ |
| 1822 ADD_COMPOSITION_DATA (charbuf, method, nchars); \ | 1880 from = coding->produced + char_offset; \ |
| 1881 to = from + nchars; \ | |
| 1882 ADD_COMPOSITION_DATA (charbuf, from, to, method); \ | |
| 1823 consumed_chars_limit = consumed_chars_base + nbytes; \ | 1883 consumed_chars_limit = consumed_chars_base + nbytes; \ |
| 1824 if (method != COMPOSITION_RELATIVE) \ | 1884 if (method != COMPOSITION_RELATIVE) \ |
| 1825 { \ | 1885 { \ |
| 1826 int i = 0; \ | 1886 int i = 0; \ |
| 1827 while (consumed_chars < consumed_chars_limit) \ | 1887 while (consumed_chars < consumed_chars_limit) \ |
| 1841 | 1901 |
| 1842 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \ | 1902 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \ |
| 1843 do { \ | 1903 do { \ |
| 1844 /* Emacs 20 style format for relative composition. */ \ | 1904 /* Emacs 20 style format for relative composition. */ \ |
| 1845 /* Store multibyte form of characters to be composed. */ \ | 1905 /* Store multibyte form of characters to be composed. */ \ |
| 1906 enum composition_method method = COMPOSITION_RELATIVE; \ | |
| 1846 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ | 1907 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ |
| 1847 int *buf = components; \ | 1908 int *buf = components; \ |
| 1848 int i, j; \ | 1909 int i, j; \ |
| 1910 int from, to; \ | |
| 1849 \ | 1911 \ |
| 1850 src = src_base; \ | 1912 src = src_base; \ |
| 1851 ONE_MORE_BYTE (c); /* skip 0x80 */ \ | 1913 ONE_MORE_BYTE (c); /* skip 0x80 */ \ |
| 1852 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ | 1914 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ |
| 1853 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ | 1915 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ |
| 1854 if (i < 2) \ | 1916 if (i < 2) \ |
| 1855 goto invalid_code; \ | 1917 goto invalid_code; \ |
| 1856 ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i); \ | 1918 from = coding->produced_char + char_offset; \ |
| 1919 to = from + i; \ | |
| 1920 ADD_COMPOSITION_DATA (charbuf, from, to, method); \ | |
| 1857 for (j = 0; j < i; j++) \ | 1921 for (j = 0; j < i; j++) \ |
| 1858 *charbuf++ = components[j]; \ | 1922 *charbuf++ = components[j]; \ |
| 1859 } while (0) | 1923 } while (0) |
| 1860 | 1924 |
| 1861 | 1925 |
| 1862 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \ | 1926 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \ |
| 1863 do { \ | 1927 do { \ |
| 1864 /* Emacs 20 style format for rule-base composition. */ \ | 1928 /* Emacs 20 style format for rule-base composition. */ \ |
| 1865 /* Store multibyte form of characters to be composed. */ \ | 1929 /* Store multibyte form of characters to be composed. */ \ |
| 1930 enum composition_method method = COMPOSITION_WITH_RULE; \ | |
| 1866 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ | 1931 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ |
| 1867 int *buf = components; \ | 1932 int *buf = components; \ |
| 1868 int i, j; \ | 1933 int i, j; \ |
| 1934 int from, to; \ | |
| 1869 \ | 1935 \ |
| 1870 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ | 1936 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ |
| 1871 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ | 1937 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ |
| 1872 { \ | 1938 { \ |
| 1873 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \ | 1939 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \ |
| 1875 } \ | 1941 } \ |
| 1876 if (i < 1 || (buf - components) % 2 == 0) \ | 1942 if (i < 1 || (buf - components) % 2 == 0) \ |
| 1877 goto invalid_code; \ | 1943 goto invalid_code; \ |
| 1878 if (charbuf + i + (i / 2) + 1 < charbuf_end) \ | 1944 if (charbuf + i + (i / 2) + 1 < charbuf_end) \ |
| 1879 goto no_more_source; \ | 1945 goto no_more_source; \ |
| 1880 ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i); \ | 1946 from = coding->produced_char + char_offset; \ |
| 1947 to = from + i; \ | |
| 1948 ADD_COMPOSITION_DATA (buf, from, to, method); \ | |
| 1881 for (j = 0; j < i; j++) \ | 1949 for (j = 0; j < i; j++) \ |
| 1882 *charbuf++ = components[j]; \ | 1950 *charbuf++ = components[j]; \ |
| 1883 for (j = 0; j < i; j += 2) \ | 1951 for (j = 0; j < i; j += 2) \ |
| 1884 *charbuf++ = components[j]; \ | 1952 *charbuf++ = components[j]; \ |
| 1885 } while (0) | 1953 } while (0) |
| 1891 { | 1959 { |
| 1892 unsigned char *src = coding->source + coding->consumed; | 1960 unsigned char *src = coding->source + coding->consumed; |
| 1893 unsigned char *src_end = coding->source + coding->src_bytes; | 1961 unsigned char *src_end = coding->source + coding->src_bytes; |
| 1894 unsigned char *src_base; | 1962 unsigned char *src_base; |
| 1895 int *charbuf = coding->charbuf; | 1963 int *charbuf = coding->charbuf; |
| 1896 int *charbuf_end = charbuf + coding->charbuf_size; | 1964 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
| 1897 int consumed_chars = 0, consumed_chars_base; | 1965 int consumed_chars = 0, consumed_chars_base; |
| 1898 int char_offset = 0; | |
| 1899 int multibytep = coding->src_multibyte; | 1966 int multibytep = coding->src_multibyte; |
| 1900 Lisp_Object attrs, eol_type, charset_list; | 1967 Lisp_Object attrs, eol_type, charset_list; |
| 1968 int char_offset = coding->produced_char; | |
| 1969 int last_offset = char_offset; | |
| 1970 int last_id = charset_ascii; | |
| 1901 | 1971 |
| 1902 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 1972 CODING_GET_INFO (coding, attrs, eol_type, charset_list); |
| 1903 | 1973 |
| 1904 while (1) | 1974 while (1) |
| 1905 { | 1975 { |
| 1933 *charbuf++ = c; | 2003 *charbuf++ = c; |
| 1934 char_offset++; | 2004 char_offset++; |
| 1935 } | 2005 } |
| 1936 else if (c == 0x80) | 2006 else if (c == 0x80) |
| 1937 { | 2007 { |
| 1938 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end) | |
| 1939 break; | |
| 1940 ONE_MORE_BYTE (c); | 2008 ONE_MORE_BYTE (c); |
| 1941 if (c - 0xF2 >= COMPOSITION_RELATIVE | 2009 if (c - 0xF2 >= COMPOSITION_RELATIVE |
| 1942 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) | 2010 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) |
| 1943 DECODE_EMACS_MULE_21_COMPOSITION (c); | 2011 DECODE_EMACS_MULE_21_COMPOSITION (c); |
| 1944 else if (c < 0xC0) | 2012 else if (c < 0xC0) |
| 1945 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); | 2013 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); |
| 1946 else if (c == 0xFF) | 2014 else if (c == 0xFF) |
| 1947 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); | 2015 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); |
| 1948 else | 2016 else |
| 1949 goto invalid_code; | 2017 goto invalid_code; |
| 1950 coding->annotated = 1; | |
| 1951 } | 2018 } |
| 1952 else if (c < 0xA0 && emacs_mule_bytes[c] > 1) | 2019 else if (c < 0xA0 && emacs_mule_bytes[c] > 1) |
| 1953 { | 2020 { |
| 1954 int nbytes, nchars; | 2021 int nbytes, nchars; |
| 2022 int id; | |
| 2023 | |
| 1955 src = src_base; | 2024 src = src_base; |
| 1956 consumed_chars = consumed_chars_base; | 2025 consumed_chars = consumed_chars_base; |
| 1957 c = emacs_mule_char (coding, src, &nbytes, &nchars); | 2026 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id); |
| 1958 if (c < 0) | 2027 if (c < 0) |
| 1959 { | 2028 { |
| 1960 if (c == -2) | 2029 if (c == -2) |
| 1961 break; | 2030 break; |
| 1962 goto invalid_code; | 2031 goto invalid_code; |
| 1963 } | 2032 } |
| 2033 if (last_id != id) | |
| 2034 { | |
| 2035 if (last_id != charset_ascii) | |
| 2036 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 2037 last_id = id; | |
| 2038 last_offset = char_offset; | |
| 2039 } | |
| 1964 *charbuf++ = c; | 2040 *charbuf++ = c; |
| 1965 src += nbytes; | 2041 src += nbytes; |
| 1966 consumed_chars += nchars; | 2042 consumed_chars += nchars; |
| 1967 char_offset++; | 2043 char_offset++; |
| 1968 } | 2044 } |
| 1971 invalid_code: | 2047 invalid_code: |
| 1972 src = src_base; | 2048 src = src_base; |
| 1973 consumed_chars = consumed_chars_base; | 2049 consumed_chars = consumed_chars_base; |
| 1974 ONE_MORE_BYTE (c); | 2050 ONE_MORE_BYTE (c); |
| 1975 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 2051 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 2052 char_offset++; | |
| 1976 coding->errors++; | 2053 coding->errors++; |
| 1977 } | 2054 } |
| 1978 | 2055 |
| 1979 no_more_source: | 2056 no_more_source: |
| 2057 if (last_id != charset_ascii) | |
| 2058 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 1980 coding->consumed_char += consumed_chars_base; | 2059 coding->consumed_char += consumed_chars_base; |
| 1981 coding->consumed = src_base - coding->source; | 2060 coding->consumed = src_base - coding->source; |
| 1982 coding->charbuf_used = charbuf - coding->charbuf; | 2061 coding->charbuf_used = charbuf - coding->charbuf; |
| 1983 } | 2062 } |
| 1984 | 2063 |
| 2009 unsigned char *dst_end = coding->destination + coding->dst_bytes; | 2088 unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 2010 int safe_room = 8; | 2089 int safe_room = 8; |
| 2011 int produced_chars = 0; | 2090 int produced_chars = 0; |
| 2012 Lisp_Object attrs, eol_type, charset_list; | 2091 Lisp_Object attrs, eol_type, charset_list; |
| 2013 int c; | 2092 int c; |
| 2093 int preferred_charset_id = -1; | |
| 2014 | 2094 |
| 2015 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 2095 CODING_GET_INFO (coding, attrs, eol_type, charset_list); |
| 2016 | 2096 |
| 2017 while (charbuf < charbuf_end) | 2097 while (charbuf < charbuf_end) |
| 2018 { | 2098 { |
| 2019 ASSURE_DESTINATION (safe_room); | 2099 ASSURE_DESTINATION (safe_room); |
| 2020 c = *charbuf++; | 2100 c = *charbuf++; |
| 2101 | |
| 2102 if (c < 0) | |
| 2103 { | |
| 2104 /* Handle an annotation. */ | |
| 2105 switch (*charbuf) | |
| 2106 { | |
| 2107 case CODING_ANNOTATE_COMPOSITION_MASK: | |
| 2108 /* Not yet implemented. */ | |
| 2109 break; | |
| 2110 case CODING_ANNOTATE_CHARSET_MASK: | |
| 2111 preferred_charset_id = charbuf[3]; | |
| 2112 if (preferred_charset_id >= 0 | |
| 2113 && NILP (Fmemq (make_number (preferred_charset_id), | |
| 2114 charset_list))) | |
| 2115 preferred_charset_id = -1; | |
| 2116 break; | |
| 2117 default: | |
| 2118 abort (); | |
| 2119 } | |
| 2120 charbuf += -c - 1; | |
| 2121 continue; | |
| 2122 } | |
| 2123 | |
| 2021 if (ASCII_CHAR_P (c)) | 2124 if (ASCII_CHAR_P (c)) |
| 2022 EMIT_ONE_ASCII_BYTE (c); | 2125 EMIT_ONE_ASCII_BYTE (c); |
| 2023 else if (CHAR_BYTE8_P (c)) | 2126 else if (CHAR_BYTE8_P (c)) |
| 2024 { | 2127 { |
| 2025 c = CHAR_TO_BYTE8 (c); | 2128 c = CHAR_TO_BYTE8 (c); |
| 2031 unsigned code; | 2134 unsigned code; |
| 2032 int dimension; | 2135 int dimension; |
| 2033 int emacs_mule_id; | 2136 int emacs_mule_id; |
| 2034 unsigned char leading_codes[2]; | 2137 unsigned char leading_codes[2]; |
| 2035 | 2138 |
| 2036 charset = char_charset (c, charset_list, &code); | 2139 if (preferred_charset_id >= 0) |
| 2140 { | |
| 2141 charset = CHARSET_FROM_ID (preferred_charset_id); | |
| 2142 if (! CHAR_CHARSET_P (c, charset)) | |
| 2143 charset = char_charset (c, charset_list, NULL); | |
| 2144 } | |
| 2145 else | |
| 2146 charset = char_charset (c, charset_list, &code); | |
| 2037 if (! charset) | 2147 if (! charset) |
| 2038 { | 2148 { |
| 2039 c = coding->default_char; | 2149 c = coding->default_char; |
| 2040 if (ASCII_CHAR_P (c)) | 2150 if (ASCII_CHAR_P (c)) |
| 2041 { | 2151 { |
| 2317 ASET (attrs, coding_attr_safe_charsets, safe_charsets); | 2427 ASET (attrs, coding_attr_safe_charsets, safe_charsets); |
| 2318 } | 2428 } |
| 2319 | 2429 |
| 2320 | 2430 |
| 2321 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 2431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 2322 Check if a text is encoded in ISO2022. If it is, returns an | 2432 Check if a text is encoded in one of ISO-2022 based codig systems. |
| 2323 integer in which appropriate flag bits any of: | 2433 If it is, return 1, else return 0. */ |
| 2324 CATEGORY_MASK_ISO_7 | |
| 2325 CATEGORY_MASK_ISO_7_TIGHT | |
| 2326 CATEGORY_MASK_ISO_8_1 | |
| 2327 CATEGORY_MASK_ISO_8_2 | |
| 2328 CATEGORY_MASK_ISO_7_ELSE | |
| 2329 CATEGORY_MASK_ISO_8_ELSE | |
| 2330 are set. If a code which should never appear in ISO2022 is found, | |
| 2331 returns 0. */ | |
| 2332 | 2434 |
| 2333 static int | 2435 static int |
| 2334 detect_coding_iso_2022 (coding, mask) | 2436 detect_coding_iso_2022 (coding, detect_info) |
| 2335 struct coding_system *coding; | 2437 struct coding_system *coding; |
| 2336 int *mask; | 2438 struct coding_detection_info *detect_info; |
| 2337 { | 2439 { |
| 2338 unsigned char *src = coding->source, *src_base = src; | 2440 unsigned char *src = coding->source, *src_base = src; |
| 2339 unsigned char *src_end = coding->source + coding->src_bytes; | 2441 unsigned char *src_end = coding->source + coding->src_bytes; |
| 2340 int multibytep = coding->src_multibyte; | 2442 int multibytep = coding->src_multibyte; |
| 2341 int mask_iso = CATEGORY_MASK_ISO; | 2443 int single_shifting = 0; |
| 2342 int mask_found = 0, mask_8bit_found = 0; | |
| 2343 int reg[4], shift_out = 0, single_shifting = 0; | |
| 2344 int id; | 2444 int id; |
| 2345 int c, c1; | 2445 int c, c1; |
| 2346 int consumed_chars = 0; | 2446 int consumed_chars = 0; |
| 2347 int i; | 2447 int i; |
| 2448 int rejected = 0; | |
| 2449 int found = 0; | |
| 2450 | |
| 2451 detect_info->checked |= CATEGORY_MASK_ISO; | |
| 2348 | 2452 |
| 2349 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++) | 2453 for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++) |
| 2350 { | 2454 { |
| 2351 struct coding_system *this = &(coding_categories[i]); | 2455 struct coding_system *this = &(coding_categories[i]); |
| 2352 Lisp_Object attrs, val; | 2456 Lisp_Object attrs, val; |
| 2361 } | 2465 } |
| 2362 | 2466 |
| 2363 /* A coding system of this category is always ASCII compatible. */ | 2467 /* A coding system of this category is always ASCII compatible. */ |
| 2364 src += coding->head_ascii; | 2468 src += coding->head_ascii; |
| 2365 | 2469 |
| 2366 reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1; | 2470 while (rejected != CATEGORY_MASK_ISO) |
| 2367 while (mask_iso && src < src_end) | |
| 2368 { | 2471 { |
| 2369 ONE_MORE_BYTE (c); | 2472 ONE_MORE_BYTE (c); |
| 2370 switch (c) | 2473 switch (c) |
| 2371 { | 2474 { |
| 2372 case ISO_CODE_ESC: | 2475 case ISO_CODE_ESC: |
| 2380 ONE_MORE_BYTE (c1); | 2483 ONE_MORE_BYTE (c1); |
| 2381 if (c1 < ' ' || c1 >= 0x80 | 2484 if (c1 < ' ' || c1 >= 0x80 |
| 2382 || (id = iso_charset_table[0][c >= ','][c1]) < 0) | 2485 || (id = iso_charset_table[0][c >= ','][c1]) < 0) |
| 2383 /* Invalid designation sequence. Just ignore. */ | 2486 /* Invalid designation sequence. Just ignore. */ |
| 2384 break; | 2487 break; |
| 2385 reg[(c - '(') % 4] = id; | |
| 2386 } | 2488 } |
| 2387 else if (c == '$') | 2489 else if (c == '$') |
| 2388 { | 2490 { |
| 2389 /* Designation sequence for a charset of dimension 2. */ | 2491 /* Designation sequence for a charset of dimension 2. */ |
| 2390 ONE_MORE_BYTE (c); | 2492 ONE_MORE_BYTE (c); |
| 2391 if (c >= '@' && c <= 'B') | 2493 if (c >= '@' && c <= 'B') |
| 2392 /* Designation for JISX0208.1978, GB2312, or JISX0208. */ | 2494 /* Designation for JISX0208.1978, GB2312, or JISX0208. */ |
| 2393 reg[0] = id = iso_charset_table[1][0][c]; | 2495 id = iso_charset_table[1][0][c]; |
| 2394 else if (c >= '(' && c <= '/') | 2496 else if (c >= '(' && c <= '/') |
| 2395 { | 2497 { |
| 2396 ONE_MORE_BYTE (c1); | 2498 ONE_MORE_BYTE (c1); |
| 2397 if (c1 < ' ' || c1 >= 0x80 | 2499 if (c1 < ' ' || c1 >= 0x80 |
| 2398 || (id = iso_charset_table[1][c >= ','][c1]) < 0) | 2500 || (id = iso_charset_table[1][c >= ','][c1]) < 0) |
| 2399 /* Invalid designation sequence. Just ignore. */ | 2501 /* Invalid designation sequence. Just ignore. */ |
| 2400 break; | 2502 break; |
| 2401 reg[(c - '(') % 4] = id; | |
| 2402 } | 2503 } |
| 2403 else | 2504 else |
| 2404 /* Invalid designation sequence. Just ignore. */ | 2505 /* Invalid designation sequence. Just ignore it. */ |
| 2405 break; | 2506 break; |
| 2406 } | 2507 } |
| 2407 else if (c == 'N' || c == 'O') | 2508 else if (c == 'N' || c == 'O') |
| 2408 { | 2509 { |
| 2409 /* ESC <Fe> for SS2 or SS3. */ | 2510 /* ESC <Fe> for SS2 or SS3. */ |
| 2410 mask_iso &= CATEGORY_MASK_ISO_7_ELSE; | 2511 single_shifting = 1; |
| 2512 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; | |
| 2411 break; | 2513 break; |
| 2412 } | 2514 } |
| 2413 else if (c >= '0' && c <= '4') | 2515 else if (c >= '0' && c <= '4') |
| 2414 { | 2516 { |
| 2415 /* ESC <Fp> for start/end composition. */ | 2517 /* ESC <Fp> for start/end composition. */ |
| 2416 mask_found |= CATEGORY_MASK_ISO; | 2518 found |= CATEGORY_MASK_ISO; |
| 2417 break; | 2519 break; |
| 2418 } | 2520 } |
| 2419 else | 2521 else |
| 2420 { | 2522 { |
| 2421 /* Invalid escape sequence. */ | 2523 /* Invalid escape sequence. Just ignore it. */ |
| 2422 mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE; | |
| 2423 break; | 2524 break; |
| 2424 } | 2525 } |
| 2425 | 2526 |
| 2426 /* We found a valid designation sequence for CHARSET. */ | 2527 /* We found a valid designation sequence for CHARSET. */ |
| 2427 mask_iso &= ~CATEGORY_MASK_ISO_8BIT; | 2528 rejected |= CATEGORY_MASK_ISO_8BIT; |
| 2428 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7], | 2529 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7], |
| 2429 id)) | 2530 id)) |
| 2430 mask_found |= CATEGORY_MASK_ISO_7; | 2531 found |= CATEGORY_MASK_ISO_7; |
| 2431 else | 2532 else |
| 2432 mask_iso &= ~CATEGORY_MASK_ISO_7; | 2533 rejected |= CATEGORY_MASK_ISO_7; |
| 2433 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight], | 2534 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight], |
| 2434 id)) | 2535 id)) |
| 2435 mask_found |= CATEGORY_MASK_ISO_7_TIGHT; | 2536 found |= CATEGORY_MASK_ISO_7_TIGHT; |
| 2436 else | 2537 else |
| 2437 mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT; | 2538 rejected |= CATEGORY_MASK_ISO_7_TIGHT; |
| 2438 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else], | 2539 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else], |
| 2439 id)) | 2540 id)) |
| 2440 mask_found |= CATEGORY_MASK_ISO_7_ELSE; | 2541 found |= CATEGORY_MASK_ISO_7_ELSE; |
| 2441 else | 2542 else |
| 2442 mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE; | 2543 rejected |= CATEGORY_MASK_ISO_7_ELSE; |
| 2443 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else], | 2544 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else], |
| 2444 id)) | 2545 id)) |
| 2445 mask_found |= CATEGORY_MASK_ISO_8_ELSE; | 2546 found |= CATEGORY_MASK_ISO_8_ELSE; |
| 2446 else | 2547 else |
| 2447 mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE; | 2548 rejected |= CATEGORY_MASK_ISO_8_ELSE; |
| 2448 break; | 2549 break; |
| 2449 | 2550 |
| 2450 case ISO_CODE_SO: | 2551 case ISO_CODE_SO: |
| 2552 case ISO_CODE_SI: | |
| 2553 /* Locking shift out/in. */ | |
| 2451 if (inhibit_iso_escape_detection) | 2554 if (inhibit_iso_escape_detection) |
| 2452 break; | 2555 break; |
| 2453 single_shifting = 0; | 2556 single_shifting = 0; |
| 2454 if (shift_out == 0 | 2557 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; |
| 2455 && (reg[1] >= 0 | 2558 found |= CATEGORY_MASK_ISO_ELSE; |
| 2456 || SHIFT_OUT_OK (coding_category_iso_7_else) | |
| 2457 || SHIFT_OUT_OK (coding_category_iso_8_else))) | |
| 2458 { | |
| 2459 /* Locking shift out. */ | |
| 2460 mask_iso &= ~CATEGORY_MASK_ISO_7BIT; | |
| 2461 mask_found |= CATEGORY_MASK_ISO_ELSE; | |
| 2462 } | |
| 2463 break; | 2559 break; |
| 2464 | 2560 |
| 2465 case ISO_CODE_SI: | 2561 case ISO_CODE_CSI: |
| 2562 /* Control sequence introducer. */ | |
| 2563 single_shifting = 0; | |
| 2564 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE; | |
| 2565 found |= CATEGORY_MASK_ISO_8_ELSE; | |
| 2566 goto check_extra_latin; | |
| 2567 | |
| 2568 | |
| 2569 case ISO_CODE_SS2: | |
| 2570 case ISO_CODE_SS3: | |
| 2571 /* Single shift. */ | |
| 2466 if (inhibit_iso_escape_detection) | 2572 if (inhibit_iso_escape_detection) |
| 2467 break; | 2573 break; |
| 2468 single_shifting = 0; | 2574 single_shifting = 1; |
| 2469 if (shift_out == 1) | 2575 rejected |= CATEGORY_MASK_ISO_7BIT; |
| 2470 { | 2576 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) |
| 2471 /* Locking shift in. */ | 2577 & CODING_ISO_FLAG_SINGLE_SHIFT) |
| 2472 mask_iso &= ~CATEGORY_MASK_ISO_7BIT; | 2578 found |= CATEGORY_MASK_ISO_8_1; |
| 2473 mask_found |= CATEGORY_MASK_ISO_ELSE; | 2579 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) |
| 2474 } | 2580 & CODING_ISO_FLAG_SINGLE_SHIFT) |
| 2475 break; | 2581 found |= CATEGORY_MASK_ISO_8_2; |
| 2476 | 2582 goto check_extra_latin; |
| 2477 case ISO_CODE_CSI: | |
| 2478 single_shifting = 0; | |
| 2479 case ISO_CODE_SS2: | |
| 2480 case ISO_CODE_SS3: | |
| 2481 { | |
| 2482 int newmask = CATEGORY_MASK_ISO_8_ELSE; | |
| 2483 | |
| 2484 mask_8bit_found = 1; | |
| 2485 if (inhibit_iso_escape_detection) | |
| 2486 break; | |
| 2487 if (c != ISO_CODE_CSI) | |
| 2488 { | |
| 2489 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | |
| 2490 & CODING_ISO_FLAG_SINGLE_SHIFT) | |
| 2491 newmask |= CATEGORY_MASK_ISO_8_1; | |
| 2492 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) | |
| 2493 & CODING_ISO_FLAG_SINGLE_SHIFT) | |
| 2494 newmask |= CATEGORY_MASK_ISO_8_2; | |
| 2495 single_shifting = 1; | |
| 2496 } | |
| 2497 if (VECTORP (Vlatin_extra_code_table) | |
| 2498 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | |
| 2499 { | |
| 2500 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | |
| 2501 & CODING_ISO_FLAG_LATIN_EXTRA) | |
| 2502 newmask |= CATEGORY_MASK_ISO_8_1; | |
| 2503 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) | |
| 2504 & CODING_ISO_FLAG_LATIN_EXTRA) | |
| 2505 newmask |= CATEGORY_MASK_ISO_8_2; | |
| 2506 } | |
| 2507 mask_iso &= newmask; | |
| 2508 mask_found |= newmask; | |
| 2509 } | |
| 2510 break; | |
| 2511 | 2583 |
| 2512 default: | 2584 default: |
| 2513 if (c < 0x80) | 2585 if (c < 0x80) |
| 2514 { | 2586 { |
| 2515 single_shifting = 0; | 2587 single_shifting = 0; |
| 2516 break; | 2588 break; |
| 2517 } | 2589 } |
| 2518 else if (c < 0xA0) | 2590 if (c >= 0xA0) |
| 2519 { | 2591 { |
| 2520 single_shifting = 0; | 2592 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE; |
| 2521 mask_8bit_found = 1; | 2593 found |= CATEGORY_MASK_ISO_8_1; |
| 2522 if (VECTORP (Vlatin_extra_code_table) | |
| 2523 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | |
| 2524 { | |
| 2525 int newmask = 0; | |
| 2526 | |
| 2527 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | |
| 2528 & CODING_ISO_FLAG_LATIN_EXTRA) | |
| 2529 newmask |= CATEGORY_MASK_ISO_8_1; | |
| 2530 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) | |
| 2531 & CODING_ISO_FLAG_LATIN_EXTRA) | |
| 2532 newmask |= CATEGORY_MASK_ISO_8_2; | |
| 2533 mask_iso &= newmask; | |
| 2534 mask_found |= newmask; | |
| 2535 } | |
| 2536 else | |
| 2537 return 0; | |
| 2538 } | |
| 2539 else | |
| 2540 { | |
| 2541 mask_iso &= ~(CATEGORY_MASK_ISO_7BIT | |
| 2542 | CATEGORY_MASK_ISO_7_ELSE); | |
| 2543 mask_found |= CATEGORY_MASK_ISO_8_1; | |
| 2544 mask_8bit_found = 1; | |
| 2545 /* Check the length of succeeding codes of the range | 2594 /* Check the length of succeeding codes of the range |
| 2546 0xA0..0FF. If the byte length is odd, we exclude | 2595 0xA0..0FF. If the byte length is even, we include |
| 2547 CATEGORY_MASK_ISO_8_2. We can check this only | 2596 CATEGORY_MASK_ISO_8_2 in `found'. We can check this |
| 2548 when we are not single shifting. */ | 2597 only when we are not single shifting. */ |
| 2549 if (!single_shifting | 2598 if (! single_shifting |
| 2550 && mask_iso & CATEGORY_MASK_ISO_8_2) | 2599 && ! (rejected & CATEGORY_MASK_ISO_8_2)) |
| 2551 { | 2600 { |
| 2552 int i = 1; | 2601 int i = 1; |
| 2553 while (src < src_end) | 2602 while (src < src_end) |
| 2554 { | 2603 { |
| 2555 ONE_MORE_BYTE (c); | 2604 ONE_MORE_BYTE (c); |
| 2557 break; | 2606 break; |
| 2558 i++; | 2607 i++; |
| 2559 } | 2608 } |
| 2560 | 2609 |
| 2561 if (i & 1 && src < src_end) | 2610 if (i & 1 && src < src_end) |
| 2562 mask_iso &= ~CATEGORY_MASK_ISO_8_2; | 2611 rejected |= CATEGORY_MASK_ISO_8_2; |
| 2563 else | 2612 else |
| 2564 mask_found |= CATEGORY_MASK_ISO_8_2; | 2613 found |= CATEGORY_MASK_ISO_8_2; |
| 2565 } | 2614 } |
| 2615 break; | |
| 2566 } | 2616 } |
| 2567 break; | 2617 check_extra_latin: |
| 2568 } | 2618 single_shifting = 0; |
| 2569 } | 2619 if (! VECTORP (Vlatin_extra_code_table) |
| 2620 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | |
| 2621 { | |
| 2622 rejected = CATEGORY_MASK_ISO; | |
| 2623 break; | |
| 2624 } | |
| 2625 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | |
| 2626 & CODING_ISO_FLAG_LATIN_EXTRA) | |
| 2627 found |= CATEGORY_MASK_ISO_8_1; | |
| 2628 else | |
| 2629 rejected |= CATEGORY_MASK_ISO_8_1; | |
| 2630 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) | |
| 2631 & CODING_ISO_FLAG_LATIN_EXTRA) | |
| 2632 found |= CATEGORY_MASK_ISO_8_2; | |
| 2633 else | |
| 2634 rejected |= CATEGORY_MASK_ISO_8_2; | |
| 2635 } | |
| 2636 } | |
| 2637 detect_info->rejected |= CATEGORY_MASK_ISO; | |
| 2638 return 0; | |
| 2639 | |
| 2570 no_more_source: | 2640 no_more_source: |
| 2571 if (!mask_iso) | 2641 detect_info->rejected |= rejected; |
| 2572 { | 2642 detect_info->found |= (found & ~rejected); |
| 2573 *mask &= ~CATEGORY_MASK_ISO; | |
| 2574 return 0; | |
| 2575 } | |
| 2576 if (!mask_found) | |
| 2577 return 0; | |
| 2578 *mask &= ~CATEGORY_MASK_ISO; | |
| 2579 *mask |= mask_iso & mask_found; | |
| 2580 if (! mask_8bit_found) | |
| 2581 *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE); | |
| 2582 return 1; | 2643 return 1; |
| 2583 } | 2644 } |
| 2584 | 2645 |
| 2585 | 2646 |
| 2586 /* Set designation state into CODING. */ | 2647 /* Set designation state into CODING. */ |
| 2692 int nchars = (component_len > 0 ? component_idx - component_len \ | 2753 int nchars = (component_len > 0 ? component_idx - component_len \ |
| 2693 : method == COMPOSITION_RELATIVE ? component_idx \ | 2754 : method == COMPOSITION_RELATIVE ? component_idx \ |
| 2694 : (component_idx + 1) / 2); \ | 2755 : (component_idx + 1) / 2); \ |
| 2695 int i; \ | 2756 int i; \ |
| 2696 int *saved_charbuf = charbuf; \ | 2757 int *saved_charbuf = charbuf; \ |
| 2758 int from = coding->produced_char + char_offset; \ | |
| 2759 int to = from + nchars; \ | |
| 2697 \ | 2760 \ |
| 2698 ADD_COMPOSITION_DATA (charbuf, method, nchars); \ | 2761 ADD_COMPOSITION_DATA (charbuf, from, to, method); \ |
| 2699 if (method != COMPOSITION_RELATIVE) \ | 2762 if (method != COMPOSITION_RELATIVE) \ |
| 2700 { \ | 2763 { \ |
| 2701 if (component_len == 0) \ | 2764 if (component_len == 0) \ |
| 2702 for (i = 0; i < component_idx; i++) \ | 2765 for (i = 0; i < component_idx; i++) \ |
| 2703 *charbuf++ = components[i]; \ | 2766 *charbuf++ = components[i]; \ |
| 2750 { | 2813 { |
| 2751 unsigned char *src = coding->source + coding->consumed; | 2814 unsigned char *src = coding->source + coding->consumed; |
| 2752 unsigned char *src_end = coding->source + coding->src_bytes; | 2815 unsigned char *src_end = coding->source + coding->src_bytes; |
| 2753 unsigned char *src_base; | 2816 unsigned char *src_base; |
| 2754 int *charbuf = coding->charbuf; | 2817 int *charbuf = coding->charbuf; |
| 2755 int *charbuf_end = charbuf + coding->charbuf_size - 4; | 2818 int *charbuf_end |
| 2819 = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH; | |
| 2756 int consumed_chars = 0, consumed_chars_base; | 2820 int consumed_chars = 0, consumed_chars_base; |
| 2757 int char_offset = 0; | |
| 2758 int multibytep = coding->src_multibyte; | 2821 int multibytep = coding->src_multibyte; |
| 2759 /* Charsets invoked to graphic plane 0 and 1 respectively. */ | 2822 /* Charsets invoked to graphic plane 0 and 1 respectively. */ |
| 2760 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); | 2823 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); |
| 2761 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); | 2824 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); |
| 2762 struct charset *charset; | 2825 struct charset *charset; |
| 2772 enum composition_method method; | 2835 enum composition_method method; |
| 2773 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; | 2836 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; |
| 2774 int component_idx; | 2837 int component_idx; |
| 2775 int component_len; | 2838 int component_len; |
| 2776 Lisp_Object attrs, eol_type, charset_list; | 2839 Lisp_Object attrs, eol_type, charset_list; |
| 2840 int char_offset = coding->produced_char; | |
| 2841 int last_offset = char_offset; | |
| 2842 int last_id = charset_ascii; | |
| 2777 | 2843 |
| 2778 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 2844 CODING_GET_INFO (coding, attrs, eol_type, charset_list); |
| 2779 setup_iso_safe_charsets (attrs); | 2845 setup_iso_safe_charsets (attrs); |
| 2780 | 2846 |
| 2781 while (1) | 2847 while (1) |
| 3049 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); | 3115 charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); |
| 3050 continue; | 3116 continue; |
| 3051 } | 3117 } |
| 3052 } | 3118 } |
| 3053 | 3119 |
| 3120 if (charset->id != charset_ascii | |
| 3121 && last_id != charset->id) | |
| 3122 { | |
| 3123 if (last_id != charset_ascii) | |
| 3124 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 3125 last_id = charset->id; | |
| 3126 last_offset = char_offset; | |
| 3127 } | |
| 3128 | |
| 3054 /* Now we know CHARSET and 1st position code C1 of a character. | 3129 /* Now we know CHARSET and 1st position code C1 of a character. |
| 3055 Produce a decoded character while getting 2nd position code | 3130 Produce a decoded character while getting 2nd position code |
| 3056 C2 if necessary. */ | 3131 C2 if necessary. */ |
| 3057 c1 &= 0x7F; | 3132 c1 &= 0x7F; |
| 3058 if (CHARSET_DIMENSION (charset) > 1) | 3133 if (CHARSET_DIMENSION (charset) > 1) |
| 3080 { | 3155 { |
| 3081 if (ASCII_BYTE_P (*src_base)) | 3156 if (ASCII_BYTE_P (*src_base)) |
| 3082 *charbuf++ = *src_base; | 3157 *charbuf++ = *src_base; |
| 3083 else | 3158 else |
| 3084 *charbuf++ = BYTE8_TO_CHAR (*src_base); | 3159 *charbuf++ = BYTE8_TO_CHAR (*src_base); |
| 3160 char_offset++; | |
| 3085 } | 3161 } |
| 3086 } | 3162 } |
| 3087 else if (composition_state == COMPOSING_NO) | 3163 else if (composition_state == COMPOSING_NO) |
| 3088 { | 3164 { |
| 3089 *charbuf++ = c; | 3165 *charbuf++ = c; |
| 3103 MAYBE_FINISH_COMPOSITION (); | 3179 MAYBE_FINISH_COMPOSITION (); |
| 3104 src = src_base; | 3180 src = src_base; |
| 3105 consumed_chars = consumed_chars_base; | 3181 consumed_chars = consumed_chars_base; |
| 3106 ONE_MORE_BYTE (c); | 3182 ONE_MORE_BYTE (c); |
| 3107 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 3183 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 3184 char_offset++; | |
| 3108 coding->errors++; | 3185 coding->errors++; |
| 3109 } | 3186 } |
| 3110 | 3187 |
| 3111 no_more_source: | 3188 no_more_source: |
| 3189 if (last_id != charset_ascii) | |
| 3190 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 3112 coding->consumed_char += consumed_chars_base; | 3191 coding->consumed_char += consumed_chars_base; |
| 3113 coding->consumed = src_base - coding->source; | 3192 coding->consumed = src_base - coding->source; |
| 3114 coding->charbuf_used = charbuf - coding->charbuf; | 3193 coding->charbuf_used = charbuf - coding->charbuf; |
| 3115 } | 3194 } |
| 3116 | 3195 |
| 3528 && CODING_ISO_BOL (coding)); | 3607 && CODING_ISO_BOL (coding)); |
| 3529 int produced_chars = 0; | 3608 int produced_chars = 0; |
| 3530 Lisp_Object attrs, eol_type, charset_list; | 3609 Lisp_Object attrs, eol_type, charset_list; |
| 3531 int ascii_compatible; | 3610 int ascii_compatible; |
| 3532 int c; | 3611 int c; |
| 3612 int preferred_charset_id = -1; | |
| 3533 | 3613 |
| 3534 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 3614 CODING_GET_INFO (coding, attrs, eol_type, charset_list); |
| 3535 setup_iso_safe_charsets (attrs); | 3615 setup_iso_safe_charsets (attrs); |
| 3616 /* Charset list may have been changed. */ | |
| 3617 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ | |
| 3536 coding->safe_charsets | 3618 coding->safe_charsets |
| 3537 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data; | 3619 = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data; |
| 3538 | 3620 |
| 3539 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); | 3621 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); |
| 3540 | 3622 |
| 3552 /* We are sure that designation sequences are all ASCII bytes. */ | 3634 /* We are sure that designation sequences are all ASCII bytes. */ |
| 3553 produced_chars += dst - dst_prev; | 3635 produced_chars += dst - dst_prev; |
| 3554 } | 3636 } |
| 3555 | 3637 |
| 3556 c = *charbuf++; | 3638 c = *charbuf++; |
| 3639 | |
| 3640 if (c < 0) | |
| 3641 { | |
| 3642 /* Handle an annotation. */ | |
| 3643 switch (*charbuf) | |
| 3644 { | |
| 3645 case CODING_ANNOTATE_COMPOSITION_MASK: | |
| 3646 /* Not yet implemented. */ | |
| 3647 break; | |
| 3648 case CODING_ANNOTATE_CHARSET_MASK: | |
| 3649 preferred_charset_id = charbuf[3]; | |
| 3650 if (preferred_charset_id >= 0 | |
| 3651 && NILP (Fmemq (make_number (preferred_charset_id), | |
| 3652 charset_list))) | |
| 3653 preferred_charset_id = -1; | |
| 3654 break; | |
| 3655 default: | |
| 3656 abort (); | |
| 3657 } | |
| 3658 charbuf += -c - 1; | |
| 3659 continue; | |
| 3660 } | |
| 3557 | 3661 |
| 3558 /* Now encode the character C. */ | 3662 /* Now encode the character C. */ |
| 3559 if (c < 0x20 || c == 0x7F) | 3663 if (c < 0x20 || c == 0x7F) |
| 3560 { | 3664 { |
| 3561 if (c == '\n' | 3665 if (c == '\n' |
| 3593 c = CHAR_TO_BYTE8 (c); | 3697 c = CHAR_TO_BYTE8 (c); |
| 3594 EMIT_ONE_BYTE (c); | 3698 EMIT_ONE_BYTE (c); |
| 3595 } | 3699 } |
| 3596 else | 3700 else |
| 3597 { | 3701 { |
| 3598 struct charset *charset = char_charset (c, charset_list, NULL); | 3702 struct charset *charset; |
| 3599 | 3703 |
| 3704 if (preferred_charset_id >= 0) | |
| 3705 { | |
| 3706 charset = CHARSET_FROM_ID (preferred_charset_id); | |
| 3707 if (! CHAR_CHARSET_P (c, charset)) | |
| 3708 charset = char_charset (c, charset_list, NULL); | |
| 3709 } | |
| 3710 else | |
| 3711 charset = char_charset (c, charset_list, NULL); | |
| 3600 if (!charset) | 3712 if (!charset) |
| 3601 { | 3713 { |
| 3602 if (coding->mode & CODING_MODE_SAFE_ENCODING) | 3714 if (coding->mode & CODING_MODE_SAFE_ENCODING) |
| 3603 { | 3715 { |
| 3604 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION; | 3716 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION; |
| 3667 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 3779 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 3668 Check if a text is encoded in SJIS. If it is, return | 3780 Check if a text is encoded in SJIS. If it is, return |
| 3669 CATEGORY_MASK_SJIS, else return 0. */ | 3781 CATEGORY_MASK_SJIS, else return 0. */ |
| 3670 | 3782 |
| 3671 static int | 3783 static int |
| 3672 detect_coding_sjis (coding, mask) | 3784 detect_coding_sjis (coding, detect_info) |
| 3673 struct coding_system *coding; | 3785 struct coding_system *coding; |
| 3674 int *mask; | 3786 struct coding_detection_info *detect_info; |
| 3675 { | 3787 { |
| 3676 unsigned char *src = coding->source, *src_base = src; | 3788 unsigned char *src = coding->source, *src_base = src; |
| 3677 unsigned char *src_end = coding->source + coding->src_bytes; | 3789 unsigned char *src_end = coding->source + coding->src_bytes; |
| 3678 int multibytep = coding->src_multibyte; | 3790 int multibytep = coding->src_multibyte; |
| 3679 int consumed_chars = 0; | 3791 int consumed_chars = 0; |
| 3680 int found = 0; | 3792 int found = 0; |
| 3681 int c; | 3793 int c; |
| 3682 int incomplete; | 3794 int incomplete; |
| 3683 | 3795 |
| 3796 detect_info->checked |= CATEGORY_MASK_SJIS; | |
| 3684 /* A coding system of this category is always ASCII compatible. */ | 3797 /* A coding system of this category is always ASCII compatible. */ |
| 3685 src += coding->head_ascii; | 3798 src += coding->head_ascii; |
| 3686 | 3799 |
| 3687 while (1) | 3800 while (1) |
| 3688 { | 3801 { |
| 3694 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) | 3807 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) |
| 3695 { | 3808 { |
| 3696 ONE_MORE_BYTE (c); | 3809 ONE_MORE_BYTE (c); |
| 3697 if (c < 0x40 || c == 0x7F || c > 0xFC) | 3810 if (c < 0x40 || c == 0x7F || c > 0xFC) |
| 3698 break; | 3811 break; |
| 3699 found = 1; | 3812 found = CATEGORY_MASK_SJIS; |
| 3700 } | 3813 } |
| 3701 else if (c >= 0xA0 && c < 0xE0) | 3814 else if (c >= 0xA0 && c < 0xE0) |
| 3702 found = 1; | 3815 found = CATEGORY_MASK_SJIS; |
| 3703 else | 3816 else |
| 3704 break; | 3817 break; |
| 3705 } | 3818 } |
| 3706 *mask &= ~CATEGORY_MASK_SJIS; | 3819 detect_info->rejected |= CATEGORY_MASK_SJIS; |
| 3707 return 0; | 3820 return 0; |
| 3708 | 3821 |
| 3709 no_more_source: | 3822 no_more_source: |
| 3710 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) | 3823 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) |
| 3711 { | 3824 { |
| 3712 *mask &= ~CATEGORY_MASK_SJIS; | 3825 detect_info->rejected |= CATEGORY_MASK_SJIS; |
| 3713 return 0; | 3826 return 0; |
| 3714 } | 3827 } |
| 3715 return found; | 3828 detect_info->found |= found; |
| 3829 return 1; | |
| 3716 } | 3830 } |
| 3717 | 3831 |
| 3718 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 3832 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 3719 Check if a text is encoded in BIG5. If it is, return | 3833 Check if a text is encoded in BIG5. If it is, return |
| 3720 CATEGORY_MASK_BIG5, else return 0. */ | 3834 CATEGORY_MASK_BIG5, else return 0. */ |
| 3721 | 3835 |
| 3722 static int | 3836 static int |
| 3723 detect_coding_big5 (coding, mask) | 3837 detect_coding_big5 (coding, detect_info) |
| 3724 struct coding_system *coding; | 3838 struct coding_system *coding; |
| 3725 int *mask; | 3839 struct coding_detection_info *detect_info; |
| 3726 { | 3840 { |
| 3727 unsigned char *src = coding->source, *src_base = src; | 3841 unsigned char *src = coding->source, *src_base = src; |
| 3728 unsigned char *src_end = coding->source + coding->src_bytes; | 3842 unsigned char *src_end = coding->source + coding->src_bytes; |
| 3729 int multibytep = coding->src_multibyte; | 3843 int multibytep = coding->src_multibyte; |
| 3730 int consumed_chars = 0; | 3844 int consumed_chars = 0; |
| 3731 int found = 0; | 3845 int found = 0; |
| 3732 int c; | 3846 int c; |
| 3733 int incomplete; | 3847 int incomplete; |
| 3734 | 3848 |
| 3849 detect_info->checked |= CATEGORY_MASK_BIG5; | |
| 3735 /* A coding system of this category is always ASCII compatible. */ | 3850 /* A coding system of this category is always ASCII compatible. */ |
| 3736 src += coding->head_ascii; | 3851 src += coding->head_ascii; |
| 3737 | 3852 |
| 3738 while (1) | 3853 while (1) |
| 3739 { | 3854 { |
| 3745 if (c >= 0xA1) | 3860 if (c >= 0xA1) |
| 3746 { | 3861 { |
| 3747 ONE_MORE_BYTE (c); | 3862 ONE_MORE_BYTE (c); |
| 3748 if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) | 3863 if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) |
| 3749 return 0; | 3864 return 0; |
| 3750 found = 1; | 3865 found = CATEGORY_MASK_BIG5; |
| 3751 } | 3866 } |
| 3752 else | 3867 else |
| 3753 break; | 3868 break; |
| 3754 } | 3869 } |
| 3755 *mask &= ~CATEGORY_MASK_BIG5; | 3870 detect_info->rejected |= CATEGORY_MASK_BIG5; |
| 3756 return 0; | 3871 return 0; |
| 3757 | 3872 |
| 3758 no_more_source: | 3873 no_more_source: |
| 3759 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) | 3874 if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) |
| 3760 { | 3875 { |
| 3761 *mask &= ~CATEGORY_MASK_BIG5; | 3876 detect_info->rejected |= CATEGORY_MASK_BIG5; |
| 3762 return 0; | 3877 return 0; |
| 3763 } | 3878 } |
| 3764 return found; | 3879 detect_info->found |= found; |
| 3880 return 1; | |
| 3765 } | 3881 } |
| 3766 | 3882 |
| 3767 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". | 3883 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". |
| 3768 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ | 3884 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ |
| 3769 | 3885 |
| 3773 { | 3889 { |
| 3774 unsigned char *src = coding->source + coding->consumed; | 3890 unsigned char *src = coding->source + coding->consumed; |
| 3775 unsigned char *src_end = coding->source + coding->src_bytes; | 3891 unsigned char *src_end = coding->source + coding->src_bytes; |
| 3776 unsigned char *src_base; | 3892 unsigned char *src_base; |
| 3777 int *charbuf = coding->charbuf; | 3893 int *charbuf = coding->charbuf; |
| 3778 int *charbuf_end = charbuf + coding->charbuf_size; | 3894 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
| 3779 int consumed_chars = 0, consumed_chars_base; | 3895 int consumed_chars = 0, consumed_chars_base; |
| 3780 int multibytep = coding->src_multibyte; | 3896 int multibytep = coding->src_multibyte; |
| 3781 struct charset *charset_roman, *charset_kanji, *charset_kana; | 3897 struct charset *charset_roman, *charset_kanji, *charset_kana; |
| 3782 Lisp_Object attrs, eol_type, charset_list, val; | 3898 Lisp_Object attrs, eol_type, charset_list, val; |
| 3899 int char_offset = coding->produced_char; | |
| 3900 int last_offset = char_offset; | |
| 3901 int last_id = charset_ascii; | |
| 3783 | 3902 |
| 3784 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 3903 CODING_GET_INFO (coding, attrs, eol_type, charset_list); |
| 3785 | 3904 |
| 3786 val = charset_list; | 3905 val = charset_list; |
| 3787 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 3906 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 3840 /* SJIS -> JISX0201-Kana */ | 3959 /* SJIS -> JISX0201-Kana */ |
| 3841 c &= 0x7F; | 3960 c &= 0x7F; |
| 3842 charset = charset_kana; | 3961 charset = charset_kana; |
| 3843 } | 3962 } |
| 3844 } | 3963 } |
| 3964 if (charset->id != charset_ascii | |
| 3965 && last_id != charset->id) | |
| 3966 { | |
| 3967 if (last_id != charset_ascii) | |
| 3968 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 3969 last_id = charset->id; | |
| 3970 last_offset = char_offset; | |
| 3971 } | |
| 3845 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); | 3972 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); |
| 3846 } | 3973 } |
| 3847 *charbuf++ = c; | 3974 *charbuf++ = c; |
| 3975 char_offset++; | |
| 3848 continue; | 3976 continue; |
| 3849 | 3977 |
| 3850 invalid_code: | 3978 invalid_code: |
| 3851 src = src_base; | 3979 src = src_base; |
| 3852 consumed_chars = consumed_chars_base; | 3980 consumed_chars = consumed_chars_base; |
| 3853 ONE_MORE_BYTE (c); | 3981 ONE_MORE_BYTE (c); |
| 3854 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 3982 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 3983 char_offset++; | |
| 3855 coding->errors++; | 3984 coding->errors++; |
| 3856 } | 3985 } |
| 3857 | 3986 |
| 3858 no_more_source: | 3987 no_more_source: |
| 3988 if (last_id != charset_ascii) | |
| 3989 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 3859 coding->consumed_char += consumed_chars_base; | 3990 coding->consumed_char += consumed_chars_base; |
| 3860 coding->consumed = src_base - coding->source; | 3991 coding->consumed = src_base - coding->source; |
| 3861 coding->charbuf_used = charbuf - coding->charbuf; | 3992 coding->charbuf_used = charbuf - coding->charbuf; |
| 3862 } | 3993 } |
| 3863 | 3994 |
| 3867 { | 3998 { |
| 3868 unsigned char *src = coding->source + coding->consumed; | 3999 unsigned char *src = coding->source + coding->consumed; |
| 3869 unsigned char *src_end = coding->source + coding->src_bytes; | 4000 unsigned char *src_end = coding->source + coding->src_bytes; |
| 3870 unsigned char *src_base; | 4001 unsigned char *src_base; |
| 3871 int *charbuf = coding->charbuf; | 4002 int *charbuf = coding->charbuf; |
| 3872 int *charbuf_end = charbuf + coding->charbuf_size; | 4003 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
| 3873 int consumed_chars = 0, consumed_chars_base; | 4004 int consumed_chars = 0, consumed_chars_base; |
| 3874 int multibytep = coding->src_multibyte; | 4005 int multibytep = coding->src_multibyte; |
| 3875 struct charset *charset_roman, *charset_big5; | 4006 struct charset *charset_roman, *charset_big5; |
| 3876 Lisp_Object attrs, eol_type, charset_list, val; | 4007 Lisp_Object attrs, eol_type, charset_list, val; |
| 4008 int char_offset = coding->produced_char; | |
| 4009 int last_offset = char_offset; | |
| 4010 int last_id = charset_ascii; | |
| 3877 | 4011 |
| 3878 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4012 CODING_GET_INFO (coding, attrs, eol_type, charset_list); |
| 3879 val = charset_list; | 4013 val = charset_list; |
| 3880 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4014 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 3881 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); | 4015 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); |
| 3921 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE) | 4055 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE) |
| 3922 goto invalid_code; | 4056 goto invalid_code; |
| 3923 c = c << 8 | c1; | 4057 c = c << 8 | c1; |
| 3924 charset = charset_big5; | 4058 charset = charset_big5; |
| 3925 } | 4059 } |
| 4060 if (charset->id != charset_ascii | |
| 4061 && last_id != charset->id) | |
| 4062 { | |
| 4063 if (last_id != charset_ascii) | |
| 4064 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 4065 last_id = charset->id; | |
| 4066 last_offset = char_offset; | |
| 4067 } | |
| 3926 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); | 4068 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); |
| 3927 } | 4069 } |
| 3928 | 4070 |
| 3929 *charbuf++ = c; | 4071 *charbuf++ = c; |
| 4072 char_offset++; | |
| 3930 continue; | 4073 continue; |
| 3931 | 4074 |
| 3932 invalid_code: | 4075 invalid_code: |
| 3933 src = src_base; | 4076 src = src_base; |
| 3934 consumed_chars = consumed_chars_base; | 4077 consumed_chars = consumed_chars_base; |
| 3935 ONE_MORE_BYTE (c); | 4078 ONE_MORE_BYTE (c); |
| 3936 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 4079 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 4080 char_offset++; | |
| 3937 coding->errors++; | 4081 coding->errors++; |
| 3938 } | 4082 } |
| 3939 | 4083 |
| 3940 no_more_source: | 4084 no_more_source: |
| 4085 if (last_id != charset_ascii) | |
| 4086 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 3941 coding->consumed_char += consumed_chars_base; | 4087 coding->consumed_char += consumed_chars_base; |
| 3942 coding->consumed = src_base - coding->source; | 4088 coding->consumed = src_base - coding->source; |
| 3943 coding->charbuf_used = charbuf - coding->charbuf; | 4089 coding->charbuf_used = charbuf - coding->charbuf; |
| 3944 } | 4090 } |
| 3945 | 4091 |
| 4104 Check if a text is encoded in a coding system of which | 4250 Check if a text is encoded in a coding system of which |
| 4105 encoder/decoder are written in CCL program. If it is, return | 4251 encoder/decoder are written in CCL program. If it is, return |
| 4106 CATEGORY_MASK_CCL, else return 0. */ | 4252 CATEGORY_MASK_CCL, else return 0. */ |
| 4107 | 4253 |
| 4108 static int | 4254 static int |
| 4109 detect_coding_ccl (coding, mask) | 4255 detect_coding_ccl (coding, detect_info) |
| 4110 struct coding_system *coding; | 4256 struct coding_system *coding; |
| 4111 int *mask; | 4257 struct coding_detection_info *detect_info; |
| 4112 { | 4258 { |
| 4113 unsigned char *src = coding->source, *src_base = src; | 4259 unsigned char *src = coding->source, *src_base = src; |
| 4114 unsigned char *src_end = coding->source + coding->src_bytes; | 4260 unsigned char *src_end = coding->source + coding->src_bytes; |
| 4115 int multibytep = coding->src_multibyte; | 4261 int multibytep = coding->src_multibyte; |
| 4116 int consumed_chars = 0; | 4262 int consumed_chars = 0; |
| 4117 int found = 0; | 4263 int found = 0; |
| 4118 unsigned char *valids = CODING_CCL_VALIDS (coding); | 4264 unsigned char *valids = CODING_CCL_VALIDS (coding); |
| 4119 int head_ascii = coding->head_ascii; | 4265 int head_ascii = coding->head_ascii; |
| 4120 Lisp_Object attrs; | 4266 Lisp_Object attrs; |
| 4121 | 4267 |
| 4268 detect_info->checked |= CATEGORY_MASK_CCL; | |
| 4269 | |
| 4122 coding = &coding_categories[coding_category_ccl]; | 4270 coding = &coding_categories[coding_category_ccl]; |
| 4123 attrs = CODING_ID_ATTRS (coding->id); | 4271 attrs = CODING_ID_ATTRS (coding->id); |
| 4124 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) | 4272 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) |
| 4125 src += head_ascii; | 4273 src += head_ascii; |
| 4126 | 4274 |
| 4128 { | 4276 { |
| 4129 int c; | 4277 int c; |
| 4130 ONE_MORE_BYTE (c); | 4278 ONE_MORE_BYTE (c); |
| 4131 if (! valids[c]) | 4279 if (! valids[c]) |
| 4132 break; | 4280 break; |
| 4133 if (!found && valids[c] > 1) | 4281 if ((valids[c] > 1)) |
| 4134 found = 1; | 4282 found = CATEGORY_MASK_CCL; |
| 4135 } | 4283 } |
| 4136 *mask &= ~CATEGORY_MASK_CCL; | 4284 detect_info->rejected |= CATEGORY_MASK_CCL; |
| 4137 return 0; | 4285 return 0; |
| 4138 | 4286 |
| 4139 no_more_source: | 4287 no_more_source: |
| 4140 return found; | 4288 detect_info->found |= found; |
| 4289 return 1; | |
| 4141 } | 4290 } |
| 4142 | 4291 |
| 4143 static void | 4292 static void |
| 4144 decode_coding_ccl (coding) | 4293 decode_coding_ccl (coding) |
| 4145 struct coding_system *coding; | 4294 struct coding_system *coding; |
| 4373 coding->produced_char += produced_chars; | 4522 coding->produced_char += produced_chars; |
| 4374 coding->produced = dst - coding->destination; | 4523 coding->produced = dst - coding->destination; |
| 4375 return 0; | 4524 return 0; |
| 4376 } | 4525 } |
| 4377 | 4526 |
| 4527 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | |
| 4528 Check if a text is encoded in a charset-based coding system. If it | |
| 4529 is, return 1, else return 0. */ | |
| 4530 | |
| 4378 static int | 4531 static int |
| 4379 detect_coding_charset (coding, mask) | 4532 detect_coding_charset (coding, detect_info) |
| 4380 struct coding_system *coding; | 4533 struct coding_system *coding; |
| 4381 int *mask; | 4534 struct coding_detection_info *detect_info; |
| 4382 { | 4535 { |
| 4383 unsigned char *src = coding->source, *src_base = src; | 4536 unsigned char *src = coding->source, *src_base = src; |
| 4384 unsigned char *src_end = coding->source + coding->src_bytes; | 4537 unsigned char *src_end = coding->source + coding->src_bytes; |
| 4385 int multibytep = coding->src_multibyte; | 4538 int multibytep = coding->src_multibyte; |
| 4386 int consumed_chars = 0; | 4539 int consumed_chars = 0; |
| 4387 Lisp_Object attrs, valids; | 4540 Lisp_Object attrs, valids; |
| 4388 int found = 0; | 4541 int found = 0; |
| 4389 | 4542 |
| 4543 detect_info->checked |= CATEGORY_MASK_CHARSET; | |
| 4544 | |
| 4390 coding = &coding_categories[coding_category_charset]; | 4545 coding = &coding_categories[coding_category_charset]; |
| 4391 attrs = CODING_ID_ATTRS (coding->id); | 4546 attrs = CODING_ID_ATTRS (coding->id); |
| 4392 valids = AREF (attrs, coding_attr_charset_valids); | 4547 valids = AREF (attrs, coding_attr_charset_valids); |
| 4393 | 4548 |
| 4394 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) | 4549 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) |
| 4400 | 4555 |
| 4401 ONE_MORE_BYTE (c); | 4556 ONE_MORE_BYTE (c); |
| 4402 if (NILP (AREF (valids, c))) | 4557 if (NILP (AREF (valids, c))) |
| 4403 break; | 4558 break; |
| 4404 if (c >= 0x80) | 4559 if (c >= 0x80) |
| 4405 found = 1; | 4560 found = CATEGORY_MASK_CHARSET; |
| 4406 } | 4561 } |
| 4407 *mask &= ~CATEGORY_MASK_CHARSET; | 4562 detect_info->rejected |= CATEGORY_MASK_CHARSET; |
| 4408 return 0; | 4563 return 0; |
| 4409 | 4564 |
| 4410 no_more_source: | 4565 no_more_source: |
| 4411 return (found || NILP (CODING_ATTR_ASCII_COMPAT (attrs))); | 4566 detect_info->found |= found; |
| 4567 return 1; | |
| 4412 } | 4568 } |
| 4413 | 4569 |
| 4414 static void | 4570 static void |
| 4415 decode_coding_charset (coding) | 4571 decode_coding_charset (coding) |
| 4416 struct coding_system *coding; | 4572 struct coding_system *coding; |
| 4417 { | 4573 { |
| 4418 unsigned char *src = coding->source + coding->consumed; | 4574 unsigned char *src = coding->source + coding->consumed; |
| 4419 unsigned char *src_end = coding->source + coding->src_bytes; | 4575 unsigned char *src_end = coding->source + coding->src_bytes; |
| 4420 unsigned char *src_base; | 4576 unsigned char *src_base; |
| 4421 int *charbuf = coding->charbuf; | 4577 int *charbuf = coding->charbuf; |
| 4422 int *charbuf_end = charbuf + coding->charbuf_size; | 4578 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
| 4423 int consumed_chars = 0, consumed_chars_base; | 4579 int consumed_chars = 0, consumed_chars_base; |
| 4424 int multibytep = coding->src_multibyte; | 4580 int multibytep = coding->src_multibyte; |
| 4425 Lisp_Object attrs, eol_type, charset_list, valids; | 4581 Lisp_Object attrs, eol_type, charset_list, valids; |
| 4582 int char_offset = coding->produced_char; | |
| 4583 int last_offset = char_offset; | |
| 4584 int last_id = charset_ascii; | |
| 4426 | 4585 |
| 4427 CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4586 CODING_GET_INFO (coding, attrs, eol_type, charset_list); |
| 4428 valids = AREF (attrs, coding_attr_charset_valids); | 4587 valids = AREF (attrs, coding_attr_charset_valids); |
| 4429 | 4588 |
| 4430 while (1) | 4589 while (1) |
| 4501 val = XCDR (val); | 4660 val = XCDR (val); |
| 4502 } | 4661 } |
| 4503 } | 4662 } |
| 4504 if (c < 0) | 4663 if (c < 0) |
| 4505 goto invalid_code; | 4664 goto invalid_code; |
| 4665 if (charset->id != charset_ascii | |
| 4666 && last_id != charset->id) | |
| 4667 { | |
| 4668 if (last_id != charset_ascii) | |
| 4669 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 4670 last_id = charset->id; | |
| 4671 last_offset = char_offset; | |
| 4672 } | |
| 4506 } | 4673 } |
| 4507 *charbuf++ = c; | 4674 *charbuf++ = c; |
| 4675 char_offset++; | |
| 4508 continue; | 4676 continue; |
| 4509 | 4677 |
| 4510 invalid_code: | 4678 invalid_code: |
| 4511 src = src_base; | 4679 src = src_base; |
| 4512 consumed_chars = consumed_chars_base; | 4680 consumed_chars = consumed_chars_base; |
| 4513 ONE_MORE_BYTE (c); | 4681 ONE_MORE_BYTE (c); |
| 4514 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 4682 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 4683 char_offset++; | |
| 4515 coding->errors++; | 4684 coding->errors++; |
| 4516 } | 4685 } |
| 4517 | 4686 |
| 4518 no_more_source: | 4687 no_more_source: |
| 4688 if (last_id != charset_ascii) | |
| 4689 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | |
| 4519 coding->consumed_char += consumed_chars_base; | 4690 coding->consumed_char += consumed_chars_base; |
| 4520 coding->consumed = src_base - coding->source; | 4691 coding->consumed = src_base - coding->source; |
| 4521 coding->charbuf_used = charbuf - coding->charbuf; | 4692 coding->charbuf_used = charbuf - coding->charbuf; |
| 4522 } | 4693 } |
| 4523 | 4694 |
| 4630 } | 4801 } |
| 4631 else if (EQ (coding_type, Qiso_2022)) | 4802 else if (EQ (coding_type, Qiso_2022)) |
| 4632 { | 4803 { |
| 4633 int i; | 4804 int i; |
| 4634 int flags = XINT (AREF (attrs, coding_attr_iso_flags)); | 4805 int flags = XINT (AREF (attrs, coding_attr_iso_flags)); |
| 4806 enum coding_category category = XINT (CODING_ATTR_CATEGORY (attrs)); | |
| 4635 | 4807 |
| 4636 /* Invoke graphic register 0 to plane 0. */ | 4808 /* Invoke graphic register 0 to plane 0. */ |
| 4637 CODING_ISO_INVOCATION (coding, 0) = 0; | 4809 CODING_ISO_INVOCATION (coding, 0) = 0; |
| 4638 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */ | 4810 /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */ |
| 4639 CODING_ISO_INVOCATION (coding, 1) | 4811 CODING_ISO_INVOCATION (coding, 1) |
| 4653 coding->common_flags | 4825 coding->common_flags |
| 4654 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK | 4826 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK |
| 4655 | CODING_REQUIRE_FLUSHING_MASK); | 4827 | CODING_REQUIRE_FLUSHING_MASK); |
| 4656 if (flags & CODING_ISO_FLAG_COMPOSITION) | 4828 if (flags & CODING_ISO_FLAG_COMPOSITION) |
| 4657 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK; | 4829 coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK; |
| 4830 if (flags & CODING_ISO_FLAG_DESIGNATION) | |
| 4831 coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK; | |
| 4658 if (flags & CODING_ISO_FLAG_FULL_SUPPORT) | 4832 if (flags & CODING_ISO_FLAG_FULL_SUPPORT) |
| 4659 { | 4833 { |
| 4660 setup_iso_safe_charsets (attrs); | 4834 setup_iso_safe_charsets (attrs); |
| 4661 val = CODING_ATTR_SAFE_CHARSETS (attrs); | 4835 val = CODING_ATTR_SAFE_CHARSETS (attrs); |
| 4662 coding->max_charset_id = XSTRING (val)->size - 1; | 4836 coding->max_charset_id = XSTRING (val)->size - 1; |
| 4928 #define EOL_SEEN_NONE 0 | 5102 #define EOL_SEEN_NONE 0 |
| 4929 #define EOL_SEEN_LF 1 | 5103 #define EOL_SEEN_LF 1 |
| 4930 #define EOL_SEEN_CR 2 | 5104 #define EOL_SEEN_CR 2 |
| 4931 #define EOL_SEEN_CRLF 4 | 5105 #define EOL_SEEN_CRLF 4 |
| 4932 | 5106 |
| 4933 /* Detect how end-of-line of a text of length CODING->src_bytes | 5107 /* Detect how end-of-line of a text of length SRC_BYTES pointed by |
| 4934 pointed by CODING->source is encoded. Return one of | 5108 SOURCE is encoded. If CATEGORY is one of |
| 4935 EOL_SEEN_XXX. */ | 5109 coding_category_utf_16_XXXX, assume that CR and LF are encoded by |
| 5110 two-byte, else they are encoded by one-byte. | |
| 5111 | |
| 5112 Return one of EOL_SEEN_XXX. */ | |
| 4936 | 5113 |
| 4937 #define MAX_EOL_CHECK_COUNT 3 | 5114 #define MAX_EOL_CHECK_COUNT 3 |
| 4938 | 5115 |
| 4939 static int | 5116 static int |
| 4940 detect_eol (source, src_bytes, category) | 5117 detect_eol (source, src_bytes, category) |
| 5055 | 5232 |
| 5056 /* If we have not yet decided the text encoding type, detect it | 5233 /* If we have not yet decided the text encoding type, detect it |
| 5057 now. */ | 5234 now. */ |
| 5058 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) | 5235 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) |
| 5059 { | 5236 { |
| 5060 int mask = CATEGORY_MASK_ANY; | |
| 5061 int c, i; | 5237 int c, i; |
| 5062 | 5238 |
| 5063 for (src = coding->source; src < src_end; src++) | 5239 for (src = coding->source; src < src_end; src++) |
| 5064 { | 5240 { |
| 5065 c = *src; | 5241 c = *src; |
| 5070 } | 5246 } |
| 5071 coding->head_ascii = src - (coding->source + coding->consumed); | 5247 coding->head_ascii = src - (coding->source + coding->consumed); |
| 5072 | 5248 |
| 5073 if (coding->head_ascii < coding->src_bytes) | 5249 if (coding->head_ascii < coding->src_bytes) |
| 5074 { | 5250 { |
| 5075 int detected = 0; | 5251 struct coding_detection_info detect_info; |
| 5076 | 5252 enum coding_category category; |
| 5253 struct coding_system *this; | |
| 5254 | |
| 5255 detect_info.checked = detect_info.found = detect_info.rejected = 0; | |
| 5077 for (i = 0; i < coding_category_raw_text; i++) | 5256 for (i = 0; i < coding_category_raw_text; i++) |
| 5078 { | 5257 { |
| 5079 enum coding_category category = coding_priorities[i]; | 5258 category = coding_priorities[i]; |
| 5080 struct coding_system *this = coding_categories + category; | 5259 this = coding_categories + category; |
| 5081 | |
| 5082 if (this->id < 0) | 5260 if (this->id < 0) |
| 5083 { | 5261 { |
| 5084 /* No coding system of this category is defined. */ | 5262 /* No coding system of this category is defined. */ |
| 5085 mask &= ~(1 << category); | 5263 detect_info.rejected |= (1 << category); |
| 5086 } | 5264 } |
| 5087 else if (category >= coding_category_raw_text | 5265 else if (category >= coding_category_raw_text) |
| 5088 || detected & (1 << category)) | |
| 5089 continue; | 5266 continue; |
| 5090 else | 5267 else if (detect_info.checked & (1 << category)) |
| 5091 { | 5268 { |
| 5092 detected |= detected_mask[category]; | 5269 if (detect_info.found & (1 << category)) |
| 5093 if ((*(this->detector)) (coding, &mask) | 5270 break; |
| 5094 && (mask & (1 << category))) | |
| 5095 { | |
| 5096 mask = 1 << category; | |
| 5097 break; | |
| 5098 } | |
| 5099 } | 5271 } |
| 5272 else if ((*(this->detector)) (coding, &detect_info) | |
| 5273 && detect_info.found & (1 << category)) | |
| 5274 break; | |
| 5100 } | 5275 } |
| 5101 if (! mask) | 5276 if (i < coding_category_raw_text) |
| 5277 setup_coding_system (CODING_ID_NAME (this->id), coding); | |
| 5278 else if (detect_info.rejected == CATEGORY_MASK_ANY) | |
| 5102 setup_coding_system (Qraw_text, coding); | 5279 setup_coding_system (Qraw_text, coding); |
| 5103 else if (mask != CATEGORY_MASK_ANY) | 5280 else if (detect_info.rejected) |
| 5104 for (i = 0; i < coding_category_raw_text; i++) | 5281 for (i = 0; i < coding_category_raw_text; i++) |
| 5105 { | 5282 if (! (detect_info.rejected & (1 << coding_priorities[i]))) |
| 5106 enum coding_category category = coding_priorities[i]; | 5283 { |
| 5107 struct coding_system *this = coding_categories + category; | 5284 this = coding_categories + coding_priorities[i]; |
| 5108 | 5285 setup_coding_system (CODING_ID_NAME (this->id), coding); |
| 5109 if (mask & (1 << category)) | 5286 break; |
| 5110 { | 5287 } |
| 5111 setup_coding_system (CODING_ID_NAME (this->id), coding); | |
| 5112 break; | |
| 5113 } | |
| 5114 } | |
| 5115 } | 5288 } |
| 5116 } | 5289 } |
| 5117 | 5290 |
| 5118 attrs = CODING_ID_ATTRS (coding->id); | 5291 attrs = CODING_ID_ATTRS (coding->id); |
| 5119 coding_type = CODING_ATTR_TYPE (attrs); | 5292 coding_type = CODING_ATTR_TYPE (attrs); |
| 5406 coding->produced += produced; | 5579 coding->produced += produced; |
| 5407 coding->produced_char += produced_chars; | 5580 coding->produced_char += produced_chars; |
| 5408 return produced_chars; | 5581 return produced_chars; |
| 5409 } | 5582 } |
| 5410 | 5583 |
| 5411 /* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ] | 5584 /* Compose text in CODING->object according to the annotation data at |
| 5412 or | 5585 CHARBUF. CHARBUF is an array: |
| 5413 [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ] | 5586 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ] |
| 5414 */ | 5587 */ |
| 5415 | 5588 |
| 5416 static INLINE void | 5589 static INLINE void |
| 5417 produce_composition (coding, charbuf) | 5590 produce_composition (coding, charbuf) |
| 5418 struct coding_system *coding; | 5591 struct coding_system *coding; |
| 5419 int *charbuf; | 5592 int *charbuf; |
| 5420 { | 5593 { |
| 5421 Lisp_Object buffer; | |
| 5422 int len; | 5594 int len; |
| 5423 EMACS_INT pos; | 5595 EMACS_INT from, to; |
| 5424 enum composition_method method; | 5596 enum composition_method method; |
| 5425 int cmp_len; | |
| 5426 Lisp_Object components; | 5597 Lisp_Object components; |
| 5427 | 5598 |
| 5428 buffer = coding->dst_object; | |
| 5429 len = -charbuf[0]; | 5599 len = -charbuf[0]; |
| 5430 pos = coding->dst_pos + charbuf[1]; | 5600 from = coding->dst_pos + charbuf[2]; |
| 5431 method = (enum composition_method) (charbuf[3]); | 5601 to = coding->dst_pos + charbuf[3]; |
| 5432 cmp_len = charbuf[4]; | 5602 method = (enum composition_method) (charbuf[4]); |
| 5433 | 5603 |
| 5434 if (method == COMPOSITION_RELATIVE) | 5604 if (method == COMPOSITION_RELATIVE) |
| 5435 components = Qnil; | 5605 components = Qnil; |
| 5436 else | 5606 else |
| 5437 { | 5607 { |
| 5443 for (i = 0; i < len; i++) | 5613 for (i = 0; i < len; i++) |
| 5444 args[i] = make_number (charbuf[i]); | 5614 args[i] = make_number (charbuf[i]); |
| 5445 components = (method == COMPOSITION_WITH_ALTCHARS | 5615 components = (method == COMPOSITION_WITH_ALTCHARS |
| 5446 ? Fstring (len, args) : Fvector (len, args)); | 5616 ? Fstring (len, args) : Fvector (len, args)); |
| 5447 } | 5617 } |
| 5448 compose_text (pos, pos + cmp_len, components, Qnil, Qnil); | 5618 compose_text (from, to, components, Qnil, coding->dst_object); |
| 5449 } | 5619 } |
| 5450 | 5620 |
| 5451 static int * | 5621 |
| 5452 save_composition_data (buf, buf_end, prop) | 5622 /* Put `charset' property on text in CODING->object according to |
| 5453 int *buf, *buf_end; | 5623 the annotation data at CHARBUF. CHARBUF is an array: |
| 5454 Lisp_Object prop; | 5624 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ] |
| 5455 { | 5625 */ |
| 5456 enum composition_method method = COMPOSITION_METHOD (prop); | 5626 |
| 5457 int cmp_len = COMPOSITION_LENGTH (prop); | 5627 static INLINE void |
| 5458 | 5628 produce_charset (coding, charbuf) |
| 5459 if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end) | 5629 struct coding_system *coding; |
| 5460 return NULL; | 5630 int *charbuf; |
| 5461 | 5631 { |
| 5462 buf[1] = CODING_ANNOTATE_COMPOSITION_MASK; | 5632 EMACS_INT from = coding->dst_pos + charbuf[2]; |
| 5463 buf[2] = method; | 5633 EMACS_INT to = coding->dst_pos + charbuf[3]; |
| 5464 buf[3] = cmp_len; | 5634 struct charset *charset = CHARSET_FROM_ID (charbuf[4]); |
| 5465 | 5635 |
| 5466 if (method == COMPOSITION_RELATIVE) | 5636 Fput_text_property (make_number (from), make_number (to), |
| 5467 buf[0] = 4; | 5637 Qcharset, CHARSET_NAME (charset), |
| 5468 else | 5638 coding->dst_object); |
| 5469 { | 5639 } |
| 5470 Lisp_Object components; | 5640 |
| 5471 int len, i; | |
| 5472 | |
| 5473 components = COMPOSITION_COMPONENTS (prop); | |
| 5474 if (VECTORP (components)) | |
| 5475 { | |
| 5476 len = XVECTOR (components)->size; | |
| 5477 for (i = 0; i < len; i++) | |
| 5478 buf[4 + i] = XINT (AREF (components, i)); | |
| 5479 } | |
| 5480 else if (STRINGP (components)) | |
| 5481 { | |
| 5482 int i_byte; | |
| 5483 | |
| 5484 len = XSTRING (components)->size; | |
| 5485 i = i_byte = 0; | |
| 5486 while (i < len) | |
| 5487 FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte); | |
| 5488 } | |
| 5489 else if (INTEGERP (components)) | |
| 5490 { | |
| 5491 len = 1; | |
| 5492 buf[4] = XINT (components); | |
| 5493 } | |
| 5494 else if (CONSP (components)) | |
| 5495 { | |
| 5496 for (len = 0; CONSP (components); | |
| 5497 len++, components = XCDR (components)) | |
| 5498 buf[4 + len] = XINT (XCAR (components)); | |
| 5499 } | |
| 5500 else | |
| 5501 abort (); | |
| 5502 buf[0] = 4 + len; | |
| 5503 } | |
| 5504 return (buf + buf[0]); | |
| 5505 } | |
| 5506 | 5641 |
| 5507 #define CHARBUF_SIZE 0x4000 | 5642 #define CHARBUF_SIZE 0x4000 |
| 5508 | 5643 |
| 5509 #define ALLOC_CONVERSION_WORK_AREA(coding) \ | 5644 #define ALLOC_CONVERSION_WORK_AREA(coding) \ |
| 5510 do { \ | 5645 do { \ |
| 5532 struct coding_system *coding; | 5667 struct coding_system *coding; |
| 5533 { | 5668 { |
| 5534 int *charbuf = coding->charbuf; | 5669 int *charbuf = coding->charbuf; |
| 5535 int *charbuf_end = charbuf + coding->charbuf_used; | 5670 int *charbuf_end = charbuf + coding->charbuf_used; |
| 5536 | 5671 |
| 5672 if (NILP (coding->dst_object)) | |
| 5673 return; | |
| 5674 | |
| 5537 while (charbuf < charbuf_end) | 5675 while (charbuf < charbuf_end) |
| 5538 { | 5676 { |
| 5539 if (*charbuf >= 0) | 5677 if (*charbuf >= 0) |
| 5540 charbuf++; | 5678 charbuf++; |
| 5541 else | 5679 else |
| 5542 { | 5680 { |
| 5543 int len = -*charbuf; | 5681 int len = -*charbuf; |
| 5544 switch (charbuf[2]) | 5682 switch (charbuf[1]) |
| 5545 { | 5683 { |
| 5546 case CODING_ANNOTATE_COMPOSITION_MASK: | 5684 case CODING_ANNOTATE_COMPOSITION_MASK: |
| 5547 produce_composition (coding, charbuf); | 5685 produce_composition (coding, charbuf); |
| 5686 break; | |
| 5687 case CODING_ANNOTATE_CHARSET_MASK: | |
| 5688 produce_charset (coding, charbuf); | |
| 5548 break; | 5689 break; |
| 5549 default: | 5690 default: |
| 5550 abort (); | 5691 abort (); |
| 5551 } | 5692 } |
| 5552 charbuf += len; | 5693 charbuf += len; |
| 5667 } | 5808 } |
| 5668 | 5809 |
| 5669 return coding->result; | 5810 return coding->result; |
| 5670 } | 5811 } |
| 5671 | 5812 |
| 5813 | |
| 5814 /* Extract an annotation data from a composition starting at POS and | |
| 5815 ending before LIMIT of CODING->src_object (buffer or string), store | |
| 5816 the data in BUF, set *STOP to a starting position of the next | |
| 5817 composition (if any) or to LIMIT, and return the address of the | |
| 5818 next element of BUF. | |
| 5819 | |
| 5820 If such an annotation is not found, set *STOP to a starting | |
| 5821 position of a composition after POS (if any) or to LIMIT, and | |
| 5822 return BUF. */ | |
| 5823 | |
| 5824 static INLINE int * | |
| 5825 handle_composition_annotation (pos, limit, coding, buf, stop) | |
| 5826 EMACS_INT pos, limit; | |
| 5827 struct coding_system *coding; | |
| 5828 int *buf; | |
| 5829 EMACS_INT *stop; | |
| 5830 { | |
| 5831 EMACS_INT start, end; | |
| 5832 Lisp_Object prop; | |
| 5833 | |
| 5834 if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object) | |
| 5835 || end > limit) | |
| 5836 *stop = limit; | |
| 5837 else if (start > pos) | |
| 5838 *stop = start; | |
| 5839 else | |
| 5840 { | |
| 5841 if (start == pos) | |
| 5842 { | |
| 5843 /* We found a composition. Store the corresponding | |
| 5844 annotation data in BUF. */ | |
| 5845 int *head = buf; | |
| 5846 enum composition_method method = COMPOSITION_METHOD (prop); | |
| 5847 int nchars = COMPOSITION_LENGTH (prop); | |
| 5848 | |
| 5849 ADD_COMPOSITION_DATA (buf, 0, nchars, method); | |
| 5850 if (method != COMPOSITION_RELATIVE) | |
| 5851 { | |
| 5852 Lisp_Object components; | |
| 5853 int len, i, i_byte; | |
| 5854 | |
| 5855 components = COMPOSITION_COMPONENTS (prop); | |
| 5856 if (VECTORP (components)) | |
| 5857 { | |
| 5858 len = XVECTOR (components)->size; | |
| 5859 for (i = 0; i < len; i++) | |
| 5860 *buf++ = XINT (AREF (components, i)); | |
| 5861 } | |
| 5862 else if (STRINGP (components)) | |
| 5863 { | |
| 5864 len = XSTRING (components)->size; | |
| 5865 i = i_byte = 0; | |
| 5866 while (i < len) | |
| 5867 { | |
| 5868 FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte); | |
| 5869 buf++; | |
| 5870 } | |
| 5871 } | |
| 5872 else if (INTEGERP (components)) | |
| 5873 { | |
| 5874 len = 1; | |
| 5875 *buf++ = XINT (components); | |
| 5876 } | |
| 5877 else if (CONSP (components)) | |
| 5878 { | |
| 5879 for (len = 0; CONSP (components); | |
| 5880 len++, components = XCDR (components)) | |
| 5881 *buf++ = XINT (XCAR (components)); | |
| 5882 } | |
| 5883 else | |
| 5884 abort (); | |
| 5885 *head -= len; | |
| 5886 } | |
| 5887 } | |
| 5888 | |
| 5889 if (find_composition (end, limit, &start, &end, &prop, | |
| 5890 coding->src_object) | |
| 5891 && end <= limit) | |
| 5892 *stop = start; | |
| 5893 else | |
| 5894 *stop = limit; | |
| 5895 } | |
| 5896 return buf; | |
| 5897 } | |
| 5898 | |
| 5899 | |
| 5900 /* Extract an annotation data from a text property `charset' at POS of | |
| 5901 CODING->src_object (buffer of string), store the data in BUF, set | |
| 5902 *STOP to the position where the value of `charset' property changes | |
| 5903 (limiting by LIMIT), and return the address of the next element of | |
| 5904 BUF. | |
| 5905 | |
| 5906 If the property value is nil, set *STOP to the position where the | |
| 5907 property value is non-nil (limiting by LIMIT), and return BUF. */ | |
| 5908 | |
| 5909 static INLINE int * | |
| 5910 handle_charset_annotation (pos, limit, coding, buf, stop) | |
| 5911 EMACS_INT pos, limit; | |
| 5912 struct coding_system *coding; | |
| 5913 int *buf; | |
| 5914 EMACS_INT *stop; | |
| 5915 { | |
| 5916 Lisp_Object val, next; | |
| 5917 int id; | |
| 5918 | |
| 5919 val = Fget_text_property (make_number (pos), Qcharset, coding->src_object); | |
| 5920 if (! NILP (val) && CHARSETP (val)) | |
| 5921 id = XINT (CHARSET_SYMBOL_ID (val)); | |
| 5922 else | |
| 5923 id = -1; | |
| 5924 ADD_CHARSET_DATA (buf, 0, 0, id); | |
| 5925 next = Fnext_single_property_change (make_number (pos), Qcharset, | |
| 5926 coding->src_object, | |
| 5927 make_number (limit)); | |
| 5928 *stop = XINT (next); | |
| 5929 return buf; | |
| 5930 } | |
| 5931 | |
| 5932 | |
| 5672 static void | 5933 static void |
| 5673 consume_chars (coding) | 5934 consume_chars (coding) |
| 5674 struct coding_system *coding; | 5935 struct coding_system *coding; |
| 5675 { | 5936 { |
| 5676 int *buf = coding->charbuf; | 5937 int *buf = coding->charbuf; |
| 5677 /* -1 is to compensate for CRLF. */ | 5938 int *buf_end = coding->charbuf + coding->charbuf_size; |
| 5678 int *buf_end = coding->charbuf + coding->charbuf_size - 1; | |
| 5679 const unsigned char *src = coding->source + coding->consumed; | 5939 const unsigned char *src = coding->source + coding->consumed; |
| 5680 int pos = coding->src_pos + coding->consumed_char; | 5940 EMACS_INT pos = coding->src_pos + coding->consumed_char; |
| 5681 int end_pos = coding->src_pos + coding->src_chars; | 5941 EMACS_INT end_pos = coding->src_pos + coding->src_chars; |
| 5682 int multibytep = coding->src_multibyte; | 5942 int multibytep = coding->src_multibyte; |
| 5683 Lisp_Object eol_type; | 5943 Lisp_Object eol_type; |
| 5684 int c; | 5944 int c; |
| 5685 int start, end, stop; | 5945 EMACS_INT stop, stop_composition, stop_charset; |
| 5686 Lisp_Object object, prop; | 5946 int id; |
| 5687 | 5947 |
| 5688 eol_type = CODING_ID_EOL_TYPE (coding->id); | 5948 eol_type = CODING_ID_EOL_TYPE (coding->id); |
| 5689 if (VECTORP (eol_type)) | 5949 if (VECTORP (eol_type)) |
| 5690 eol_type = Qunix; | 5950 eol_type = Qunix; |
| 5691 | 5951 |
| 5692 object = coding->src_object; | |
| 5693 | |
| 5694 /* Note: composition handling is not yet implemented. */ | 5952 /* Note: composition handling is not yet implemented. */ |
| 5695 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; | 5953 coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; |
| 5696 | 5954 |
| 5697 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK | 5955 if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK) |
| 5698 && find_composition (pos, end_pos, &start, &end, &prop, object) | 5956 stop = stop_composition = pos; |
| 5699 && end <= end_pos | |
| 5700 && (start >= pos | |
| 5701 || (find_composition (end, end_pos, &start, &end, &prop, object) | |
| 5702 && end <= end_pos))) | |
| 5703 stop = start; | |
| 5704 else | 5957 else |
| 5705 stop = end_pos; | 5958 stop = stop_composition = end_pos; |
| 5706 | 5959 if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK) |
| 5960 stop = stop_charset = pos; | |
| 5961 else | |
| 5962 stop_charset = end_pos; | |
| 5963 | |
| 5964 /* Compensate for CRLF and annotation. */ | |
| 5965 buf_end -= 1 + MAX_ANNOTATION_LENGTH; | |
| 5707 while (buf < buf_end) | 5966 while (buf < buf_end) |
| 5708 { | 5967 { |
| 5709 if (pos == stop) | 5968 if (pos == stop) |
| 5710 { | 5969 { |
| 5711 int *p; | 5970 int *p; |
| 5712 | 5971 |
| 5713 if (pos == end_pos) | 5972 if (pos == end_pos) |
| 5714 break; | 5973 break; |
| 5715 p = save_composition_data (buf, buf_end, prop); | 5974 if (pos == stop_composition) |
| 5716 if (p == NULL) | 5975 buf = handle_composition_annotation (pos, end_pos, coding, |
| 5717 break; | 5976 buf, &stop_composition); |
| 5718 buf = p; | 5977 if (pos == stop_charset) |
| 5719 if (find_composition (end, end_pos, &start, &end, &prop, object) | 5978 buf = handle_charset_annotation (pos, end_pos, coding, |
| 5720 && end <= end_pos) | 5979 buf, &stop_charset); |
| 5721 stop = start; | 5980 stop = (stop_composition < stop_charset |
| 5722 else | 5981 ? stop_composition : stop_charset); |
| 5723 stop = end_pos; | |
| 5724 } | 5982 } |
| 5725 | 5983 |
| 5726 if (! multibytep) | 5984 if (! multibytep) |
| 5727 c = *src++; | 5985 c = *src++; |
| 5728 else | 5986 else |
| 6160 coding->src_pos_byte = from_byte; | 6418 coding->src_pos_byte = from_byte; |
| 6161 } | 6419 } |
| 6162 else if (BUFFERP (src_object)) | 6420 else if (BUFFERP (src_object)) |
| 6163 { | 6421 { |
| 6164 set_buffer_internal (XBUFFER (src_object)); | 6422 set_buffer_internal (XBUFFER (src_object)); |
| 6165 if (from != GPT) | |
| 6166 move_gap_both (from, from_byte); | |
| 6167 if (EQ (src_object, dst_object)) | 6423 if (EQ (src_object, dst_object)) |
| 6168 { | 6424 { |
| 6169 del_range_both (from, from_byte, to, to_byte, 1); | 6425 coding->src_object = del_range_1 (from, to, 1, 1); |
| 6170 coding->src_pos = -chars; | 6426 coding->src_pos = 0; |
| 6171 coding->src_pos_byte = -bytes; | 6427 coding->src_pos_byte = 0; |
| 6172 } | 6428 } |
| 6173 else | 6429 else |
| 6174 { | 6430 { |
| 6431 if (from < GPT && to >= GPT) | |
| 6432 move_gap_both (from, from_byte); | |
| 6175 coding->src_pos = from; | 6433 coding->src_pos = from; |
| 6176 coding->src_pos_byte = from_byte; | 6434 coding->src_pos_byte = from_byte; |
| 6177 } | 6435 } |
| 6178 } | 6436 } |
| 6179 | 6437 |
| 6318 int multibytep; | 6576 int multibytep; |
| 6319 Lisp_Object coding_system; | 6577 Lisp_Object coding_system; |
| 6320 { | 6578 { |
| 6321 unsigned char *src_end = src + src_bytes; | 6579 unsigned char *src_end = src + src_bytes; |
| 6322 int mask = CATEGORY_MASK_ANY; | 6580 int mask = CATEGORY_MASK_ANY; |
| 6323 int detected = 0; | |
| 6324 int c, i; | |
| 6325 Lisp_Object attrs, eol_type; | 6581 Lisp_Object attrs, eol_type; |
| 6326 Lisp_Object val; | 6582 Lisp_Object val; |
| 6327 struct coding_system coding; | 6583 struct coding_system coding; |
| 6328 int id; | 6584 int id; |
| 6585 struct coding_detection_info detect_info; | |
| 6329 | 6586 |
| 6330 if (NILP (coding_system)) | 6587 if (NILP (coding_system)) |
| 6331 coding_system = Qundecided; | 6588 coding_system = Qundecided; |
| 6332 setup_coding_system (coding_system, &coding); | 6589 setup_coding_system (coding_system, &coding); |
| 6333 attrs = CODING_ID_ATTRS (coding.id); | 6590 attrs = CODING_ID_ATTRS (coding.id); |
| 6338 coding.src_bytes = src_bytes; | 6595 coding.src_bytes = src_bytes; |
| 6339 coding.src_multibyte = multibytep; | 6596 coding.src_multibyte = multibytep; |
| 6340 coding.consumed = 0; | 6597 coding.consumed = 0; |
| 6341 coding.mode |= CODING_MODE_LAST_BLOCK; | 6598 coding.mode |= CODING_MODE_LAST_BLOCK; |
| 6342 | 6599 |
| 6600 detect_info.checked = detect_info.found = detect_info.rejected = 0; | |
| 6601 | |
| 6343 /* At first, detect text-format if necessary. */ | 6602 /* At first, detect text-format if necessary. */ |
| 6344 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided) | 6603 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided) |
| 6345 { | 6604 { |
| 6605 enum coding_category category; | |
| 6606 struct coding_system *this; | |
| 6607 int c, i; | |
| 6608 | |
| 6346 for (; src < src_end; src++) | 6609 for (; src < src_end; src++) |
| 6347 { | 6610 { |
| 6348 c = *src; | 6611 c = *src; |
| 6349 if (c & 0x80 | 6612 if (c & 0x80 |
| 6350 || (c < 0x20 && (c == ISO_CODE_ESC | 6613 || (c < 0x20 && (c == ISO_CODE_ESC |
| 6355 coding.head_ascii = src - coding.source; | 6618 coding.head_ascii = src - coding.source; |
| 6356 | 6619 |
| 6357 if (src < src_end) | 6620 if (src < src_end) |
| 6358 for (i = 0; i < coding_category_raw_text; i++) | 6621 for (i = 0; i < coding_category_raw_text; i++) |
| 6359 { | 6622 { |
| 6360 enum coding_category category = coding_priorities[i]; | 6623 category = coding_priorities[i]; |
| 6361 struct coding_system *this = coding_categories + category; | 6624 this = coding_categories + category; |
| 6362 | 6625 |
| 6363 if (this->id < 0) | 6626 if (this->id < 0) |
| 6364 { | 6627 { |
| 6365 /* No coding system of this category is defined. */ | 6628 /* No coding system of this category is defined. */ |
| 6366 mask &= ~(1 << category); | 6629 detect_info.rejected |= (1 << category); |
| 6367 } | 6630 } |
| 6368 else if (category >= coding_category_raw_text | 6631 else if (category >= coding_category_raw_text) |
| 6369 || detected & (1 << category)) | |
| 6370 continue; | 6632 continue; |
| 6633 else if (detect_info.checked & (1 << category)) | |
| 6634 { | |
| 6635 if (highest | |
| 6636 && (detect_info.found & (1 << category))) | |
| 6637 break; | |
| 6638 } | |
| 6371 else | 6639 else |
| 6372 { | 6640 { |
| 6373 detected |= detected_mask[category]; | 6641 if ((*(this->detector)) (&coding, &detect_info) |
| 6374 if ((*(coding_categories[category].detector)) (&coding, &mask) | |
| 6375 && highest | 6642 && highest |
| 6376 && (mask & (1 << category))) | 6643 && (detect_info.found & (1 << category))) |
| 6377 { | 6644 break; |
| 6378 mask = 1 << category; | |
| 6379 break; | |
| 6380 } | |
| 6381 } | 6645 } |
| 6382 } | 6646 } |
| 6383 | 6647 |
| 6384 if (!mask) | 6648 |
| 6385 { | 6649 if (detect_info.rejected == CATEGORY_MASK_ANY) |
| 6650 { | |
| 6651 detect_info.found = CATEGORY_MASK_RAW_TEXT; | |
| 6386 id = coding_categories[coding_category_raw_text].id; | 6652 id = coding_categories[coding_category_raw_text].id; |
| 6387 val = Fcons (make_number (id), Qnil); | 6653 val = Fcons (make_number (id), Qnil); |
| 6388 } | 6654 } |
| 6389 else if (mask == CATEGORY_MASK_ANY) | 6655 else if (! detect_info.rejected && ! detect_info.found) |
| 6390 { | 6656 { |
| 6657 detect_info.found = CATEGORY_MASK_ANY; | |
| 6391 id = coding_categories[coding_category_undecided].id; | 6658 id = coding_categories[coding_category_undecided].id; |
| 6392 val = Fcons (make_number (id), Qnil); | 6659 val = Fcons (make_number (id), Qnil); |
| 6393 } | 6660 } |
| 6394 else if (highest) | 6661 else if (highest) |
| 6395 { | 6662 { |
| 6396 for (i = 0; i < coding_category_raw_text; i++) | 6663 if (detect_info.found) |
| 6397 if (mask & (1 << coding_priorities[i])) | 6664 { |
| 6398 { | 6665 detect_info.found = 1 << category; |
| 6399 id = coding_categories[coding_priorities[i]].id; | 6666 val = Fcons (make_number (this->id), Qnil); |
| 6400 val = Fcons (make_number (id), Qnil); | 6667 } |
| 6401 break; | 6668 else |
| 6402 } | 6669 for (i = 0; i < coding_category_raw_text; i++) |
| 6403 } | 6670 if (! (detect_info.rejected & (1 << coding_priorities[i]))) |
| 6671 { | |
| 6672 detect_info.found = 1 << coding_priorities[i]; | |
| 6673 id = coding_categories[coding_priorities[i]].id; | |
| 6674 val = Fcons (make_number (id), Qnil); | |
| 6675 break; | |
| 6676 } | |
| 6677 } | |
| 6404 else | 6678 else |
| 6405 { | 6679 { |
| 6680 int mask = detect_info.rejected | detect_info.found; | |
| 6681 int found = 0; | |
| 6406 val = Qnil; | 6682 val = Qnil; |
| 6683 | |
| 6407 for (i = coding_category_raw_text - 1; i >= 0; i--) | 6684 for (i = coding_category_raw_text - 1; i >= 0; i--) |
| 6408 if (mask & (1 << coding_priorities[i])) | 6685 { |
| 6409 { | 6686 category = coding_priorities[i]; |
| 6410 id = coding_categories[coding_priorities[i]].id; | 6687 if (! (mask & (1 << category))) |
| 6411 val = Fcons (make_number (id), val); | 6688 { |
| 6412 } | 6689 found |= 1 << category; |
| 6690 id = coding_categories[category].id; | |
| 6691 val = Fcons (make_number (id), val); | |
| 6692 } | |
| 6693 } | |
| 6694 for (i = coding_category_raw_text - 1; i >= 0; i--) | |
| 6695 { | |
| 6696 category = coding_priorities[i]; | |
| 6697 if (detect_info.found & (1 << category)) | |
| 6698 { | |
| 6699 id = coding_categories[category].id; | |
| 6700 val = Fcons (make_number (id), val); | |
| 6701 } | |
| 6702 } | |
| 6703 detect_info.found |= found; | |
| 6413 } | 6704 } |
| 6414 } | 6705 } |
| 6415 else | 6706 else |
| 6416 { | 6707 { |
| 6417 mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); | 6708 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); |
| 6418 val = Fcons (make_number (coding.id), Qnil); | 6709 val = Fcons (make_number (coding.id), Qnil); |
| 6419 } | 6710 } |
| 6420 | 6711 |
| 6421 /* Then, detect eol-format if necessary. */ | 6712 /* Then, detect eol-format if necessary. */ |
| 6422 { | 6713 { |
| 6423 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol; | 6714 int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol; |
| 6424 Lisp_Object tail; | 6715 Lisp_Object tail; |
| 6425 | 6716 |
| 6426 if (VECTORP (eol_type)) | 6717 if (VECTORP (eol_type)) |
| 6427 { | 6718 { |
| 6428 if (mask & ~CATEGORY_MASK_UTF_16) | 6719 if (detect_info.found & ~CATEGORY_MASK_UTF_16) |
| 6429 normal_eol = detect_eol (coding.source, src_bytes, | 6720 normal_eol = detect_eol (coding.source, src_bytes, |
| 6430 coding_category_raw_text); | 6721 coding_category_raw_text); |
| 6431 if (mask & (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_BE_NOSIG)) | 6722 if (detect_info.found & (CATEGORY_MASK_UTF_16_BE |
| 6723 | CATEGORY_MASK_UTF_16_BE_NOSIG)) | |
| 6432 utf_16_be_eol = detect_eol (coding.source, src_bytes, | 6724 utf_16_be_eol = detect_eol (coding.source, src_bytes, |
| 6433 coding_category_utf_16_be); | 6725 coding_category_utf_16_be); |
| 6434 if (mask & (CATEGORY_MASK_UTF_16_LE | CATEGORY_MASK_UTF_16_LE_NOSIG)) | 6726 if (detect_info.found & (CATEGORY_MASK_UTF_16_LE |
| 6727 | CATEGORY_MASK_UTF_16_LE_NOSIG)) | |
| 6435 utf_16_le_eol = detect_eol (coding.source, src_bytes, | 6728 utf_16_le_eol = detect_eol (coding.source, src_bytes, |
| 6436 coding_category_utf_16_le); | 6729 coding_category_utf_16_le); |
| 6437 } | 6730 } |
| 6438 else | 6731 else |
| 6439 { | 6732 { |
