comparison src/coding.c @ 20150:402b6e5f4b58

(encode_designation_at_bol): Fix bug of finding graphic registers which should be designated at bol. (Qsafe_charset): New variable. (syms_of_coding): Initialize and staticpro it. (detect_coding_iso2022): Handle SS2 and SS3 correctly. (DECODE_ISO_CHARACTER): Recover from incorrect encoding in less dangerous way. (ENCODE_DESIGNATION): Get charset revision number by CODING_SPEC_ISO_REVISION_NUMBER. (setup_coding_system): Initialize the member safe_charsets from the coding systems's safe-charsets property. Initialize the member charset_revision_number of struct iso2022_spec. (ENCODE_ISO_CHARACTER_DIMENSION1): Adjusted for the change of `safe_charsets' member. (ENCODE_ISO_CHARACTER_DIMENSION2): Likewise. (code_convert_region): Restore the current point after calling a function in coding->post_read_conversion.
author Kenichi Handa <handa@m17n.org>
date Thu, 23 Oct 1997 12:01:50 +0000
parents c017642863c2
children 71008f909642
comparison
equal deleted inserted replaced
20149:b804a783ceec 20150:402b6e5f4b58
251 Lisp_Object Qcoding_system, Qeol_type; 251 Lisp_Object Qcoding_system, Qeol_type;
252 Lisp_Object Qbuffer_file_coding_system; 252 Lisp_Object Qbuffer_file_coding_system;
253 Lisp_Object Qpost_read_conversion, Qpre_write_conversion; 253 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
254 Lisp_Object Qno_conversion, Qundecided; 254 Lisp_Object Qno_conversion, Qundecided;
255 Lisp_Object Qcoding_system_history; 255 Lisp_Object Qcoding_system_history;
256 Lisp_Object Qsafe_charsets;
256 257
257 extern Lisp_Object Qinsert_file_contents, Qwrite_region; 258 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
258 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; 259 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
259 Lisp_Object Qstart_process, Qopen_network_stream; 260 Lisp_Object Qstart_process, Qopen_network_stream;
260 Lisp_Object Qtarget_idx; 261 Lisp_Object Qtarget_idx;
703 case ISO_CODE_SS2: 704 case ISO_CODE_SS2:
704 case ISO_CODE_SS3: 705 case ISO_CODE_SS3:
705 { 706 {
706 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE; 707 int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
707 708
709 if (c != ISO_CODE_CSI)
710 {
711 if (coding_iso_8_1.flags & CODING_FLAG_ISO_SINGLE_SHIFT)
712 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
713 if (coding_iso_8_2.flags & CODING_FLAG_ISO_SINGLE_SHIFT)
714 newmask |= CODING_CATEGORY_MASK_ISO_8_2;
715 }
708 if (VECTORP (Vlatin_extra_code_table) 716 if (VECTORP (Vlatin_extra_code_table)
709 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) 717 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
710 { 718 {
711 if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA) 719 if (coding_iso_8_1.flags & CODING_FLAG_ISO_LATIN_EXTRA)
712 newmask |= CODING_CATEGORY_MASK_ISO_8_1; 720 newmask |= CODING_CATEGORY_MASK_ISO_8_1;
772 coding->composing += 2; \ 780 coding->composing += 2; \
773 } \ 781 } \
774 if ((charset) >= 0) \ 782 if ((charset) >= 0) \
775 { \ 783 { \
776 if (CHARSET_DIMENSION (charset) == 2) \ 784 if (CHARSET_DIMENSION (charset) == 2) \
777 ONE_MORE_BYTE (c2); \ 785 { \
786 ONE_MORE_BYTE (c2); \
787 if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \
788 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \
789 { \
790 src--; \
791 c2 = ' '; \
792 } \
793 } \
778 if (!NILP (unification_table) \ 794 if (!NILP (unification_table) \
779 && ((c_alt = unify_char (unification_table, \ 795 && ((c_alt = unify_char (unification_table, \
780 -1, (charset), c1, c2)) >= 0)) \ 796 -1, (charset), c1, c2)) >= 0)) \
781 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \ 797 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
782 } \ 798 } \
1129 #define ENCODE_DESIGNATION(charset, reg, coding) \ 1145 #define ENCODE_DESIGNATION(charset, reg, coding) \
1130 do { \ 1146 do { \
1131 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \ 1147 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
1132 char *intermediate_char_94 = "()*+"; \ 1148 char *intermediate_char_94 = "()*+"; \
1133 char *intermediate_char_96 = ",-./"; \ 1149 char *intermediate_char_96 = ",-./"; \
1134 Lisp_Object temp \ 1150 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
1135 = Fassq (make_number (charset), Vcharset_revision_alist); \ 1151 if (revision < 255) \
1136 if (! NILP (temp)) \ 1152 { \
1137 { \
1138 *dst++ = ISO_CODE_ESC; \ 1153 *dst++ = ISO_CODE_ESC; \
1139 *dst++ = '&'; \ 1154 *dst++ = '&'; \
1140 *dst++ = XINT (XCONS (temp)->cdr) + '@'; \ 1155 *dst++ = '@' + revision; \
1141 } \ 1156 } \
1142 *dst++ = ISO_CODE_ESC; \ 1157 *dst++ = ISO_CODE_ESC; \
1143 if (CHARSET_DIMENSION (charset) == 1) \ 1158 if (CHARSET_DIMENSION (charset) == 1) \
1144 { \ 1159 { \
1145 if (CHARSET_CHARS (charset) == 94) \ 1160 if (CHARSET_CHARS (charset) == 94) \
1239 { \ 1254 { \
1240 *dst++ = c1 | 0x80; \ 1255 *dst++ = c1 | 0x80; \
1241 break; \ 1256 break; \
1242 } \ 1257 } \
1243 else if (coding->flags & CODING_FLAG_ISO_SAFE \ 1258 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1244 && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]) \ 1259 && !coding->safe_charsets[charset]) \
1245 { \ 1260 { \
1246 /* We should not encode this character, instead produce one or \ 1261 /* We should not encode this character, instead produce one or \
1247 two `?'s. */ \ 1262 two `?'s. */ \
1248 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ 1263 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1249 if (CHARSET_WIDTH (charset) == 2) \ 1264 if (CHARSET_WIDTH (charset) == 2) \
1282 { \ 1297 { \
1283 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \ 1298 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
1284 break; \ 1299 break; \
1285 } \ 1300 } \
1286 else if (coding->flags & CODING_FLAG_ISO_SAFE \ 1301 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1287 && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]) \ 1302 && !coding->safe_charsets[charset]) \
1288 { \ 1303 { \
1289 /* We should not encode this character, instead produce one or \ 1304 /* We should not encode this character, instead produce one or \
1290 two `?'s. */ \ 1305 two `?'s. */ \
1291 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ 1306 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1292 if (CHARSET_WIDTH (charset) == 2) \ 1307 if (CHARSET_WIDTH (charset) == 2) \
1448 if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0) 1463 if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
1449 charset = CHAR_CHARSET (c_alt); 1464 charset = CHAR_CHARSET (c_alt);
1450 } 1465 }
1451 1466
1452 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); 1467 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1453 if (r[reg] == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION) 1468 if (r[reg] < 0)
1454 { 1469 {
1455 found++; 1470 found++;
1456 r[reg] = charset; 1471 r[reg] = charset;
1457 } 1472 }
1458 1473
2300 Lisp_Object coding_system; 2315 Lisp_Object coding_system;
2301 struct coding_system *coding; 2316 struct coding_system *coding;
2302 { 2317 {
2303 Lisp_Object coding_spec, plist, type, eol_type; 2318 Lisp_Object coding_spec, plist, type, eol_type;
2304 Lisp_Object val; 2319 Lisp_Object val;
2320 int i;
2305 2321
2306 /* At first, set several fields to default values. */ 2322 /* At first, set several fields to default values. */
2307 coding->require_flushing = 0; 2323 coding->require_flushing = 0;
2308 coding->last_block = 0; 2324 coding->last_block = 0;
2309 coding->selective = 0; 2325 coding->selective = 0;
2342 if (SYMBOLP (val)) 2358 if (SYMBOLP (val))
2343 val = Fget (val, Qcharacter_unification_table_for_encode); 2359 val = Fget (val, Qcharacter_unification_table_for_encode);
2344 coding->character_unification_table_for_encode 2360 coding->character_unification_table_for_encode
2345 = CHAR_TABLE_P (val) ? val : Qnil; 2361 = CHAR_TABLE_P (val) ? val : Qnil;
2346 2362
2363 val = Fplist_get (plist, Qsafe_charsets);
2364 if (EQ (val, Qt))
2365 {
2366 for (i = 0; i <= MAX_CHARSET; i++)
2367 coding->safe_charsets[i] = 1;
2368 }
2369 else
2370 {
2371 bzero (coding->safe_charsets, MAX_CHARSET + 1);
2372 while (CONSP (val))
2373 {
2374 if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2375 coding->safe_charsets[i] = 1;
2376 val = XCONS (val)->cdr;
2377 }
2378 }
2379
2347 if (VECTORP (eol_type)) 2380 if (VECTORP (eol_type))
2348 coding->eol_type = CODING_EOL_UNDECIDED; 2381 coding->eol_type = CODING_EOL_UNDECIDED;
2349 else if (XFASTINT (eol_type) == 1) 2382 else if (XFASTINT (eol_type) == 1)
2350 coding->eol_type = CODING_EOL_CRLF; 2383 coding->eol_type = CODING_EOL_CRLF;
2351 else if (XFASTINT (eol_type) == 2) 2384 else if (XFASTINT (eol_type) == 2)
2365 break; 2398 break;
2366 2399
2367 case 2: 2400 case 2:
2368 coding->type = coding_type_iso2022; 2401 coding->type = coding_type_iso2022;
2369 { 2402 {
2370 Lisp_Object val; 2403 Lisp_Object val, temp;
2371 Lisp_Object *flags; 2404 Lisp_Object *flags;
2372 int i, charset, default_reg_bits = 0; 2405 int i, charset, default_reg_bits = 0;
2373 2406
2374 val = XVECTOR (coding_spec)->contents[4]; 2407 val = XVECTOR (coding_spec)->contents[4];
2375 2408
2401 /* Not single shifting at first. */ 2434 /* Not single shifting at first. */
2402 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; 2435 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2403 /* Beginning of buffer should also be regarded as bol. */ 2436 /* Beginning of buffer should also be regarded as bol. */
2404 CODING_SPEC_ISO_BOL (coding) = 1; 2437 CODING_SPEC_ISO_BOL (coding) = 1;
2405 2438
2439 for (charset = 0; charset <= MAX_CHARSET; charset++)
2440 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2441 val = Vcharset_revision_alist;
2442 while (CONSP (val))
2443 {
2444 charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2445 if (charset >= 0
2446 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2447 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2448 CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2449 val = XCONS (val)->cdr;
2450 }
2451
2406 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. 2452 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2407 FLAGS[REG] can be one of below: 2453 FLAGS[REG] can be one of below:
2408 integer CHARSET: CHARSET occupies register I, 2454 integer CHARSET: CHARSET occupies register I,
2409 t: designate nothing to REG initially, but can be used 2455 t: designate nothing to REG initially, but can be used
2410 by any charsets, 2456 by any charsets,
2414 if an element is t, REG can be used by any charset, 2460 if an element is t, REG can be used by any charset,
2415 nil: REG is never used. */ 2461 nil: REG is never used. */
2416 for (charset = 0; charset <= MAX_CHARSET; charset++) 2462 for (charset = 0; charset <= MAX_CHARSET; charset++)
2417 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) 2463 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2418 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION; 2464 = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2419 bzero (CODING_SPEC_ISO_EXPECTED_CHARSETS (coding), MAX_CHARSET + 1);
2420 for (i = 0; i < 4; i++) 2465 for (i = 0; i < 4; i++)
2421 { 2466 {
2422 if (INTEGERP (flags[i]) 2467 if (INTEGERP (flags[i])
2423 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)) 2468 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2424 || (charset = get_charset_id (flags[i])) >= 0) 2469 || (charset = get_charset_id (flags[i])) >= 0)
2425 { 2470 {
2426 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; 2471 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2427 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; 2472 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2428 CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1;
2429 } 2473 }
2430 else if (EQ (flags[i], Qt)) 2474 else if (EQ (flags[i], Qt))
2431 { 2475 {
2432 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; 2476 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2433 default_reg_bits |= 1 << i; 2477 default_reg_bits |= 1 << i;
2441 CHARSET_VALID_P (charset)) 2485 CHARSET_VALID_P (charset))
2442 || (charset = get_charset_id (XCONS (tail)->car)) >= 0) 2486 || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2443 { 2487 {
2444 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; 2488 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2445 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; 2489 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2446 CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1;
2447 } 2490 }
2448 else 2491 else
2449 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; 2492 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2450 tail = XCONS (tail)->cdr; 2493 tail = XCONS (tail)->cdr;
2451 while (CONSP (tail)) 2494 while (CONSP (tail))
2452 { 2495 {
2453 if (INTEGERP (XCONS (tail)->car) 2496 if (INTEGERP (XCONS (tail)->car)
2454 && (charset = XINT (XCONS (tail)->car), 2497 && (charset = XINT (XCONS (tail)->car),
2455 CHARSET_VALID_P (charset)) 2498 CHARSET_VALID_P (charset))
2456 || (charset = get_charset_id (XCONS (tail)->car)) >= 0) 2499 || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2457 { 2500 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2458 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) 2501 = i;
2459 = i;
2460 CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]
2461 = 1;
2462 }
2463 else if (EQ (XCONS (tail)->car, Qt)) 2502 else if (EQ (XCONS (tail)->car, Qt))
2464 default_reg_bits |= 1 << i; 2503 default_reg_bits |= 1 << i;
2465 tail = XCONS (tail)->cdr; 2504 tail = XCONS (tail)->cdr;
2466 } 2505 }
2467 } 2506 }
3392 3431
3393 beg = XINT (b); 3432 beg = XINT (b);
3394 TEMP_SET_PT (beg); 3433 TEMP_SET_PT (beg);
3395 insval = call1 (coding->post_read_conversion, make_number (len)); 3434 insval = call1 (coding->post_read_conversion, make_number (len));
3396 CHECK_NUMBER (insval, 0); 3435 CHECK_NUMBER (insval, 0);
3436 if (pos >= beg + len)
3437 pos = beg + XINT (insval);
3438 else if (pos > beg)
3439 pos = beg;
3440 TEMP_SET_PT (pos);
3397 len = XINT (insval); 3441 len = XINT (insval);
3398 } 3442 }
3399 3443
3400 return make_number (len); 3444 return make_number (len);
3401 } 3445 }
3641 (coding_system) 3685 (coding_system)
3642 Lisp_Object coding_system; 3686 Lisp_Object coding_system;
3643 { 3687 {
3644 CHECK_SYMBOL (coding_system, 0); 3688 CHECK_SYMBOL (coding_system, 0);
3645 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); 3689 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
3646 /* We had better not send unexpected characters to terminal. */ 3690 /* We had better not send unsafe characters to terminal. */
3647 terminal_coding.flags |= CODING_FLAG_ISO_SAFE; 3691 terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
3648 3692
3649 return Qnil; 3693 return Qnil;
3650 } 3694 }
3651 3695
3935 3979
3936 Qcharacter_unification_table_for_encode 3980 Qcharacter_unification_table_for_encode
3937 = intern ("character-unification-table-for-encode"); 3981 = intern ("character-unification-table-for-encode");
3938 staticpro (&Qcharacter_unification_table_for_encode); 3982 staticpro (&Qcharacter_unification_table_for_encode);
3939 3983
3984 Qsafe_charsets = intern ("safe-charsets");
3985 staticpro (&Qsafe_charsets);
3986
3940 Qemacs_mule = intern ("emacs-mule"); 3987 Qemacs_mule = intern ("emacs-mule");
3941 staticpro (&Qemacs_mule); 3988 staticpro (&Qemacs_mule);
3942 3989
3943 defsubr (&Scoding_system_p); 3990 defsubr (&Scoding_system_p);
3944 defsubr (&Sread_coding_system); 3991 defsubr (&Sread_coding_system);