Mercurial > emacs
comparison src/coding.c @ 20150:402b6e5f4b58
(encode_designation_at_bol): Fix bug of finding graphic
registers which should be designated at bol.
(Qsafe_charset): New variable.
(syms_of_coding): Initialize and staticpro it.
(detect_coding_iso2022): Handle SS2 and SS3 correctly.
(DECODE_ISO_CHARACTER): Recover from incorrect encoding in less
dangerous way.
(ENCODE_DESIGNATION): Get charset revision number by
CODING_SPEC_ISO_REVISION_NUMBER.
(setup_coding_system): Initialize the member safe_charsets from
the coding systems's safe-charsets property. Initialize the
member charset_revision_number of struct iso2022_spec.
(ENCODE_ISO_CHARACTER_DIMENSION1): Adjusted for the change of
`safe_charsets' member.
(ENCODE_ISO_CHARACTER_DIMENSION2): Likewise.
(code_convert_region): Restore the current point after calling a
function in coding->post_read_conversion.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Thu, 23 Oct 1997 12:01:50 +0000 |
| parents | c017642863c2 |
| children | 71008f909642 |
comparison
equal
deleted
inserted
replaced
| 20149:b804a783ceec | 20150:402b6e5f4b58 |
|---|---|
| 251 Lisp_Object Qcoding_system, Qeol_type; | 251 Lisp_Object Qcoding_system, Qeol_type; |
| 252 Lisp_Object Qbuffer_file_coding_system; | 252 Lisp_Object Qbuffer_file_coding_system; |
| 253 Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | 253 Lisp_Object Qpost_read_conversion, Qpre_write_conversion; |
| 254 Lisp_Object Qno_conversion, Qundecided; | 254 Lisp_Object Qno_conversion, Qundecided; |
| 255 Lisp_Object Qcoding_system_history; | 255 Lisp_Object Qcoding_system_history; |
| 256 Lisp_Object Qsafe_charsets; | |
| 256 | 257 |
| 257 extern Lisp_Object Qinsert_file_contents, Qwrite_region; | 258 extern Lisp_Object Qinsert_file_contents, Qwrite_region; |
| 258 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; | 259 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; |
| 259 Lisp_Object Qstart_process, Qopen_network_stream; | 260 Lisp_Object Qstart_process, Qopen_network_stream; |
| 260 Lisp_Object Qtarget_idx; | 261 Lisp_Object Qtarget_idx; |
| 703 case ISO_CODE_SS2: | 704 case ISO_CODE_SS2: |
| 704 case ISO_CODE_SS3: | 705 case ISO_CODE_SS3: |
| 705 { | 706 { |
| 706 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE; | 707 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE; |
| 707 | 708 |
| 709 if (c != ISO_CODE_CSI) | |
| 710 { | |
| 711 if (coding_iso_8_1.flags & CODING_FLAG_ISO_SINGLE_SHIFT) | |
| 712 newmask |= CODING_CATEGORY_MASK_ISO_8_1; | |
| 713 if (coding_iso_8_2.flags & CODING_FLAG_ISO_SINGLE_SHIFT) | |
| 714 newmask |= CODING_CATEGORY_MASK_ISO_8_2; | |
| 715 } | |
| 708 if (VECTORP (Vlatin_extra_code_table) | 716 if (VECTORP (Vlatin_extra_code_table) |
| 709 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | 717 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) |
| 710 { | 718 { |
| 711 if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA) | 719 if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA) |
| 712 newmask |= CODING_CATEGORY_MASK_ISO_8_1; | 720 newmask |= CODING_CATEGORY_MASK_ISO_8_1; |
| 772 coding->composing += 2; \ | 780 coding->composing += 2; \ |
| 773 } \ | 781 } \ |
| 774 if ((charset) >= 0) \ | 782 if ((charset) >= 0) \ |
| 775 { \ | 783 { \ |
| 776 if (CHARSET_DIMENSION (charset) == 2) \ | 784 if (CHARSET_DIMENSION (charset) == 2) \ |
| 777 ONE_MORE_BYTE (c2); \ | 785 { \ |
| 786 ONE_MORE_BYTE (c2); \ | |
| 787 if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \ | |
| 788 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \ | |
| 789 { \ | |
| 790 src--; \ | |
| 791 c2 = ' '; \ | |
| 792 } \ | |
| 793 } \ | |
| 778 if (!NILP (unification_table) \ | 794 if (!NILP (unification_table) \ |
| 779 && ((c_alt = unify_char (unification_table, \ | 795 && ((c_alt = unify_char (unification_table, \ |
| 780 -1, (charset), c1, c2)) >= 0)) \ | 796 -1, (charset), c1, c2)) >= 0)) \ |
| 781 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ | 797 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ |
| 782 } \ | 798 } \ |
| 1129 #define ENCODE_DESIGNATION(charset, reg, coding) \ | 1145 #define ENCODE_DESIGNATION(charset, reg, coding) \ |
| 1130 do { \ | 1146 do { \ |
| 1131 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \ | 1147 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \ |
| 1132 char *intermediate_char_94 = "()*+"; \ | 1148 char *intermediate_char_94 = "()*+"; \ |
| 1133 char *intermediate_char_96 = ",-./"; \ | 1149 char *intermediate_char_96 = ",-./"; \ |
| 1134 Lisp_Object temp \ | 1150 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \ |
| 1135 = Fassq (make_number (charset), Vcharset_revision_alist); \ | 1151 if (revision < 255) \ |
| 1136 if (! NILP (temp)) \ | 1152 { \ |
| 1137 { \ | |
| 1138 *dst++ = ISO_CODE_ESC; \ | 1153 *dst++ = ISO_CODE_ESC; \ |
| 1139 *dst++ = '&'; \ | 1154 *dst++ = '&'; \ |
| 1140 *dst++ = XINT (XCONS (temp)->cdr) + '@'; \ | 1155 *dst++ = '@' + revision; \ |
| 1141 } \ | 1156 } \ |
| 1142 *dst++ = ISO_CODE_ESC; \ | 1157 *dst++ = ISO_CODE_ESC; \ |
| 1143 if (CHARSET_DIMENSION (charset) == 1) \ | 1158 if (CHARSET_DIMENSION (charset) == 1) \ |
| 1144 { \ | 1159 { \ |
| 1145 if (CHARSET_CHARS (charset) == 94) \ | 1160 if (CHARSET_CHARS (charset) == 94) \ |
| 1239 { \ | 1254 { \ |
| 1240 *dst++ = c1 | 0x80; \ | 1255 *dst++ = c1 | 0x80; \ |
| 1241 break; \ | 1256 break; \ |
| 1242 } \ | 1257 } \ |
| 1243 else if (coding->flags & CODING_FLAG_ISO_SAFE \ | 1258 else if (coding->flags & CODING_FLAG_ISO_SAFE \ |
| 1244 && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]) \ | 1259 && !coding->safe_charsets[charset]) \ |
| 1245 { \ | 1260 { \ |
| 1246 /* We should not encode this character, instead produce one or \ | 1261 /* We should not encode this character, instead produce one or \ |
| 1247 two `?'s. */ \ | 1262 two `?'s. */ \ |
| 1248 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | 1263 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ |
| 1249 if (CHARSET_WIDTH (charset) == 2) \ | 1264 if (CHARSET_WIDTH (charset) == 2) \ |
| 1282 { \ | 1297 { \ |
| 1283 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \ | 1298 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \ |
| 1284 break; \ | 1299 break; \ |
| 1285 } \ | 1300 } \ |
| 1286 else if (coding->flags & CODING_FLAG_ISO_SAFE \ | 1301 else if (coding->flags & CODING_FLAG_ISO_SAFE \ |
| 1287 && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]) \ | 1302 && !coding->safe_charsets[charset]) \ |
| 1288 { \ | 1303 { \ |
| 1289 /* We should not encode this character, instead produce one or \ | 1304 /* We should not encode this character, instead produce one or \ |
| 1290 two `?'s. */ \ | 1305 two `?'s. */ \ |
| 1291 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | 1306 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ |
| 1292 if (CHARSET_WIDTH (charset) == 2) \ | 1307 if (CHARSET_WIDTH (charset) == 2) \ |
| 1448 if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0) | 1463 if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0) |
| 1449 charset = CHAR_CHARSET (c_alt); | 1464 charset = CHAR_CHARSET (c_alt); |
| 1450 } | 1465 } |
| 1451 | 1466 |
| 1452 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); | 1467 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); |
| 1453 if (r[reg] == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION) | 1468 if (r[reg] < 0) |
| 1454 { | 1469 { |
| 1455 found++; | 1470 found++; |
| 1456 r[reg] = charset; | 1471 r[reg] = charset; |
| 1457 } | 1472 } |
| 1458 | 1473 |
| 2300 Lisp_Object coding_system; | 2315 Lisp_Object coding_system; |
| 2301 struct coding_system *coding; | 2316 struct coding_system *coding; |
| 2302 { | 2317 { |
| 2303 Lisp_Object coding_spec, plist, type, eol_type; | 2318 Lisp_Object coding_spec, plist, type, eol_type; |
| 2304 Lisp_Object val; | 2319 Lisp_Object val; |
| 2320 int i; | |
| 2305 | 2321 |
| 2306 /* At first, set several fields to default values. */ | 2322 /* At first, set several fields to default values. */ |
| 2307 coding->require_flushing = 0; | 2323 coding->require_flushing = 0; |
| 2308 coding->last_block = 0; | 2324 coding->last_block = 0; |
| 2309 coding->selective = 0; | 2325 coding->selective = 0; |
| 2342 if (SYMBOLP (val)) | 2358 if (SYMBOLP (val)) |
| 2343 val = Fget (val, Qcharacter_unification_table_for_encode); | 2359 val = Fget (val, Qcharacter_unification_table_for_encode); |
| 2344 coding->character_unification_table_for_encode | 2360 coding->character_unification_table_for_encode |
| 2345 = CHAR_TABLE_P (val) ? val : Qnil; | 2361 = CHAR_TABLE_P (val) ? val : Qnil; |
| 2346 | 2362 |
| 2363 val = Fplist_get (plist, Qsafe_charsets); | |
| 2364 if (EQ (val, Qt)) | |
| 2365 { | |
| 2366 for (i = 0; i <= MAX_CHARSET; i++) | |
| 2367 coding->safe_charsets[i] = 1; | |
| 2368 } | |
| 2369 else | |
| 2370 { | |
| 2371 bzero (coding->safe_charsets, MAX_CHARSET + 1); | |
| 2372 while (CONSP (val)) | |
| 2373 { | |
| 2374 if ((i = get_charset_id (XCONS (val)->car)) >= 0) | |
| 2375 coding->safe_charsets[i] = 1; | |
| 2376 val = XCONS (val)->cdr; | |
| 2377 } | |
| 2378 } | |
| 2379 | |
| 2347 if (VECTORP (eol_type)) | 2380 if (VECTORP (eol_type)) |
| 2348 coding->eol_type = CODING_EOL_UNDECIDED; | 2381 coding->eol_type = CODING_EOL_UNDECIDED; |
| 2349 else if (XFASTINT (eol_type) == 1) | 2382 else if (XFASTINT (eol_type) == 1) |
| 2350 coding->eol_type = CODING_EOL_CRLF; | 2383 coding->eol_type = CODING_EOL_CRLF; |
| 2351 else if (XFASTINT (eol_type) == 2) | 2384 else if (XFASTINT (eol_type) == 2) |
| 2365 break; | 2398 break; |
| 2366 | 2399 |
| 2367 case 2: | 2400 case 2: |
| 2368 coding->type = coding_type_iso2022; | 2401 coding->type = coding_type_iso2022; |
| 2369 { | 2402 { |
| 2370 Lisp_Object val; | 2403 Lisp_Object val, temp; |
| 2371 Lisp_Object *flags; | 2404 Lisp_Object *flags; |
| 2372 int i, charset, default_reg_bits = 0; | 2405 int i, charset, default_reg_bits = 0; |
| 2373 | 2406 |
| 2374 val = XVECTOR (coding_spec)->contents[4]; | 2407 val = XVECTOR (coding_spec)->contents[4]; |
| 2375 | 2408 |
| 2401 /* Not single shifting at first. */ | 2434 /* Not single shifting at first. */ |
| 2402 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; | 2435 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; |
| 2403 /* Beginning of buffer should also be regarded as bol. */ | 2436 /* Beginning of buffer should also be regarded as bol. */ |
| 2404 CODING_SPEC_ISO_BOL (coding) = 1; | 2437 CODING_SPEC_ISO_BOL (coding) = 1; |
| 2405 | 2438 |
| 2439 for (charset = 0; charset <= MAX_CHARSET; charset++) | |
| 2440 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255; | |
| 2441 val = Vcharset_revision_alist; | |
| 2442 while (CONSP (val)) | |
| 2443 { | |
| 2444 charset = get_charset_id (Fcar_safe (XCONS (val)->car)); | |
| 2445 if (charset >= 0 | |
| 2446 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp)) | |
| 2447 && (i = XINT (temp), (i >= 0 && (i + '@') < 128))) | |
| 2448 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i; | |
| 2449 val = XCONS (val)->cdr; | |
| 2450 } | |
| 2451 | |
| 2406 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. | 2452 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. |
| 2407 FLAGS[REG] can be one of below: | 2453 FLAGS[REG] can be one of below: |
| 2408 integer CHARSET: CHARSET occupies register I, | 2454 integer CHARSET: CHARSET occupies register I, |
| 2409 t: designate nothing to REG initially, but can be used | 2455 t: designate nothing to REG initially, but can be used |
| 2410 by any charsets, | 2456 by any charsets, |
| 2414 if an element is t, REG can be used by any charset, | 2460 if an element is t, REG can be used by any charset, |
| 2415 nil: REG is never used. */ | 2461 nil: REG is never used. */ |
| 2416 for (charset = 0; charset <= MAX_CHARSET; charset++) | 2462 for (charset = 0; charset <= MAX_CHARSET; charset++) |
| 2417 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) | 2463 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) |
| 2418 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION; | 2464 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION; |
| 2419 bzero (CODING_SPEC_ISO_EXPECTED_CHARSETS (coding), MAX_CHARSET + 1); | |
| 2420 for (i = 0; i < 4; i++) | 2465 for (i = 0; i < 4; i++) |
| 2421 { | 2466 { |
| 2422 if (INTEGERP (flags[i]) | 2467 if (INTEGERP (flags[i]) |
| 2423 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)) | 2468 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)) |
| 2424 || (charset = get_charset_id (flags[i])) >= 0) | 2469 || (charset = get_charset_id (flags[i])) >= 0) |
| 2425 { | 2470 { |
| 2426 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; | 2471 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; |
| 2427 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; | 2472 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; |
| 2428 CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1; | |
| 2429 } | 2473 } |
| 2430 else if (EQ (flags[i], Qt)) | 2474 else if (EQ (flags[i], Qt)) |
| 2431 { | 2475 { |
| 2432 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; | 2476 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; |
| 2433 default_reg_bits |= 1 << i; | 2477 default_reg_bits |= 1 << i; |
| 2441 CHARSET_VALID_P (charset)) | 2485 CHARSET_VALID_P (charset)) |
| 2442 || (charset = get_charset_id (XCONS (tail)->car)) >= 0) | 2486 || (charset = get_charset_id (XCONS (tail)->car)) >= 0) |
| 2443 { | 2487 { |
| 2444 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; | 2488 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; |
| 2445 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; | 2489 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; |
| 2446 CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1; | |
| 2447 } | 2490 } |
| 2448 else | 2491 else |
| 2449 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; | 2492 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; |
| 2450 tail = XCONS (tail)->cdr; | 2493 tail = XCONS (tail)->cdr; |
| 2451 while (CONSP (tail)) | 2494 while (CONSP (tail)) |
| 2452 { | 2495 { |
| 2453 if (INTEGERP (XCONS (tail)->car) | 2496 if (INTEGERP (XCONS (tail)->car) |
| 2454 && (charset = XINT (XCONS (tail)->car), | 2497 && (charset = XINT (XCONS (tail)->car), |
| 2455 CHARSET_VALID_P (charset)) | 2498 CHARSET_VALID_P (charset)) |
| 2456 || (charset = get_charset_id (XCONS (tail)->car)) >= 0) | 2499 || (charset = get_charset_id (XCONS (tail)->car)) >= 0) |
| 2457 { | 2500 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) |
| 2458 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) | 2501 = i; |
| 2459 = i; | |
| 2460 CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] | |
| 2461 = 1; | |
| 2462 } | |
| 2463 else if (EQ (XCONS (tail)->car, Qt)) | 2502 else if (EQ (XCONS (tail)->car, Qt)) |
| 2464 default_reg_bits |= 1 << i; | 2503 default_reg_bits |= 1 << i; |
| 2465 tail = XCONS (tail)->cdr; | 2504 tail = XCONS (tail)->cdr; |
| 2466 } | 2505 } |
| 2467 } | 2506 } |
| 3392 | 3431 |
| 3393 beg = XINT (b); | 3432 beg = XINT (b); |
| 3394 TEMP_SET_PT (beg); | 3433 TEMP_SET_PT (beg); |
| 3395 insval = call1 (coding->post_read_conversion, make_number (len)); | 3434 insval = call1 (coding->post_read_conversion, make_number (len)); |
| 3396 CHECK_NUMBER (insval, 0); | 3435 CHECK_NUMBER (insval, 0); |
| 3436 if (pos >= beg + len) | |
| 3437 pos = beg + XINT (insval); | |
| 3438 else if (pos > beg) | |
| 3439 pos = beg; | |
| 3440 TEMP_SET_PT (pos); | |
| 3397 len = XINT (insval); | 3441 len = XINT (insval); |
| 3398 } | 3442 } |
| 3399 | 3443 |
| 3400 return make_number (len); | 3444 return make_number (len); |
| 3401 } | 3445 } |
| 3641 (coding_system) | 3685 (coding_system) |
| 3642 Lisp_Object coding_system; | 3686 Lisp_Object coding_system; |
| 3643 { | 3687 { |
| 3644 CHECK_SYMBOL (coding_system, 0); | 3688 CHECK_SYMBOL (coding_system, 0); |
| 3645 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); | 3689 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); |
| 3646 /* We had better not send unexpected characters to terminal. */ | 3690 /* We had better not send unsafe characters to terminal. */ |
| 3647 terminal_coding.flags |= CODING_FLAG_ISO_SAFE; | 3691 terminal_coding.flags |= CODING_FLAG_ISO_SAFE; |
| 3648 | 3692 |
| 3649 return Qnil; | 3693 return Qnil; |
| 3650 } | 3694 } |
| 3651 | 3695 |
| 3935 | 3979 |
| 3936 Qcharacter_unification_table_for_encode | 3980 Qcharacter_unification_table_for_encode |
| 3937 = intern ("character-unification-table-for-encode"); | 3981 = intern ("character-unification-table-for-encode"); |
| 3938 staticpro (&Qcharacter_unification_table_for_encode); | 3982 staticpro (&Qcharacter_unification_table_for_encode); |
| 3939 | 3983 |
| 3984 Qsafe_charsets = intern ("safe-charsets"); | |
| 3985 staticpro (&Qsafe_charsets); | |
| 3986 | |
| 3940 Qemacs_mule = intern ("emacs-mule"); | 3987 Qemacs_mule = intern ("emacs-mule"); |
| 3941 staticpro (&Qemacs_mule); | 3988 staticpro (&Qemacs_mule); |
| 3942 | 3989 |
| 3943 defsubr (&Scoding_system_p); | 3990 defsubr (&Scoding_system_p); |
| 3944 defsubr (&Sread_coding_system); | 3991 defsubr (&Sread_coding_system); |
