Mercurial > emacs
comparison src/coding.c @ 50185:b64f1af6e142
(Vchar_coding_system_table): Remove this variable.
(Vcoding_system_safe_chars): New variable.
(intersection): Remove this function.
(find_safe_codings): Don't use Vchar_coding_system_table, but try
all codings in SAFE_CODINGS.
(Ffind_coding_systems_region_internal): Adjusted for the change of
find_safe_codings. Get generic coding systems from
Vcoding_system_safe_chars.
(Fdefine_coding_system_internal): New function.
(syms_of_coding): Defsubr Sdefine_coding_system_internal.
Initialize and staticpro Vcoding_system_safe_chars.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Tue, 18 Mar 2003 04:26:15 +0000 |
| parents | 71039837d881 |
| children | 6818550bfbc7 |
comparison
equal
deleted
inserted
replaced
| 50184:fb92861f566e | 50185:b64f1af6e142 |
|---|---|
| 504 pre-write-conversion functions. Usually the value is zero, but it | 504 pre-write-conversion functions. Usually the value is zero, but it |
| 505 is set to 1 temporarily while such functions are running. This is | 505 is set to 1 temporarily while such functions are running. This is |
| 506 to avoid infinite recursive call. */ | 506 to avoid infinite recursive call. */ |
| 507 static int inhibit_pre_post_conversion; | 507 static int inhibit_pre_post_conversion; |
| 508 | 508 |
| 509 /* Char-table containing safe coding systems of each character. */ | |
| 510 Lisp_Object Vchar_coding_system_table; | |
| 511 Lisp_Object Qchar_coding_system; | 509 Lisp_Object Qchar_coding_system; |
| 512 | 510 |
| 513 /* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check | 511 /* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check |
| 514 its validity. */ | 512 its validity. */ |
| 515 | 513 |
| 6386 SBYTES (string) + 1, | 6384 SBYTES (string) + 1, |
| 6387 !NILP (highest), | 6385 !NILP (highest), |
| 6388 STRING_MULTIBYTE (string)); | 6386 STRING_MULTIBYTE (string)); |
| 6389 } | 6387 } |
| 6390 | 6388 |
| 6391 /* Return an intersection of lists L1 and L2. */ | |
| 6392 | |
| 6393 static Lisp_Object | |
| 6394 intersection (l1, l2) | |
| 6395 Lisp_Object l1, l2; | |
| 6396 { | |
| 6397 Lisp_Object val = Fcons (Qnil, Qnil), tail; | |
| 6398 | |
| 6399 for (tail = val; CONSP (l1); l1 = XCDR (l1)) | |
| 6400 { | |
| 6401 if (!NILP (Fmemq (XCAR (l1), l2))) | |
| 6402 { | |
| 6403 XSETCDR (tail, Fcons (XCAR (l1), Qnil)); | |
| 6404 tail = XCDR (tail); | |
| 6405 } | |
| 6406 } | |
| 6407 return XCDR (val); | |
| 6408 } | |
| 6409 | |
| 6410 | |
| 6411 /* Subroutine for Fsafe_coding_systems_region_internal. | 6389 /* Subroutine for Fsafe_coding_systems_region_internal. |
| 6412 | 6390 |
| 6413 Return a list of coding systems that safely encode the multibyte | 6391 Return a list of coding systems that safely encode the multibyte |
| 6414 text between P and PEND. SAFE_CODINGS, if non-nil, is a list of | 6392 text between P and PEND. SAFE_CODINGS, if non-nil, is a list of |
| 6415 possible coding systems. If it is nil, it means that we have not | 6393 possible coding systems. If it is nil, it means that we have not |
| 6421 If a non-ASCII single byte char is found, set | 6399 If a non-ASCII single byte char is found, set |
| 6422 *single_byte_char_found to 1. */ | 6400 *single_byte_char_found to 1. */ |
| 6423 | 6401 |
| 6424 static Lisp_Object | 6402 static Lisp_Object |
| 6425 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) | 6403 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) |
| 6426 unsigned char *p, *pend; | |
| 6427 Lisp_Object safe_codings, work_table; | |
| 6428 int *single_byte_char_found; | |
| 6429 { | |
| 6430 int c, len, idx; | |
| 6431 Lisp_Object val; | |
| 6432 | |
| 6433 while (p < pend) | |
| 6434 { | |
| 6435 c = STRING_CHAR_AND_LENGTH (p, pend - p, len); | |
| 6436 p += len; | |
| 6437 if (ASCII_BYTE_P (c)) | |
| 6438 /* We can ignore ASCII characters here. */ | |
| 6439 continue; | |
| 6440 if (SINGLE_BYTE_CHAR_P (c)) | |
| 6441 *single_byte_char_found = 1; | |
| 6442 if (NILP (safe_codings)) | |
| 6443 continue; | |
| 6444 /* Check the safe coding systems for C. */ | |
| 6445 val = char_table_ref_and_index (work_table, c, &idx); | |
| 6446 if (EQ (val, Qt)) | |
| 6447 /* This element was already checked. Ignore it. */ | |
| 6448 continue; | |
| 6449 /* Remember that we checked this element. */ | |
| 6450 CHAR_TABLE_SET (work_table, make_number (idx), Qt); | |
| 6451 | |
| 6452 /* If there are some safe coding systems for C and we have | |
| 6453 already found the other set of coding systems for the | |
| 6454 different characters, get the intersection of them. */ | |
| 6455 if (!EQ (safe_codings, Qt) && !NILP (val)) | |
| 6456 val = intersection (safe_codings, val); | |
| 6457 safe_codings = val; | |
| 6458 } | |
| 6459 return safe_codings; | |
| 6460 } | |
| 6461 | |
| 6462 | |
| 6463 /* Return a list of coding systems that safely encode the text between | |
| 6464 START and END. If the text contains only ASCII or is unibyte, | |
| 6465 return t. */ | |
| 6466 | |
| 6467 DEFUN ("find-coding-systems-region-internal", | |
| 6468 Ffind_coding_systems_region_internal, | |
| 6469 Sfind_coding_systems_region_internal, 2, 2, 0, | |
| 6470 doc: /* Internal use only. */) | |
| 6471 (start, end) | |
| 6472 Lisp_Object start, end; | |
| 6473 { | |
| 6474 Lisp_Object work_table, safe_codings; | |
| 6475 int non_ascii_p = 0; | |
| 6476 int single_byte_char_found = 0; | |
| 6477 const unsigned char *p1, *p1end, *p2, *p2end, *p; | |
| 6478 | |
| 6479 if (STRINGP (start)) | |
| 6480 { | |
| 6481 if (!STRING_MULTIBYTE (start)) | |
| 6482 return Qt; | |
| 6483 p1 = SDATA (start), p1end = p1 + SBYTES (start); | |
| 6484 p2 = p2end = p1end; | |
| 6485 if (SCHARS (start) != SBYTES (start)) | |
| 6486 non_ascii_p = 1; | |
| 6487 } | |
| 6488 else | |
| 6489 { | |
| 6490 int from, to, stop; | |
| 6491 | |
| 6492 CHECK_NUMBER_COERCE_MARKER (start); | |
| 6493 CHECK_NUMBER_COERCE_MARKER (end); | |
| 6494 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) | |
| 6495 args_out_of_range (start, end); | |
| 6496 if (NILP (current_buffer->enable_multibyte_characters)) | |
| 6497 return Qt; | |
| 6498 from = CHAR_TO_BYTE (XINT (start)); | |
| 6499 to = CHAR_TO_BYTE (XINT (end)); | |
| 6500 stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to; | |
| 6501 p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from); | |
| 6502 if (stop == to) | |
| 6503 p2 = p2end = p1end; | |
| 6504 else | |
| 6505 p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop); | |
| 6506 if (XINT (end) - XINT (start) != to - from) | |
| 6507 non_ascii_p = 1; | |
| 6508 } | |
| 6509 | |
| 6510 if (!non_ascii_p) | |
| 6511 { | |
| 6512 /* We are sure that the text contains no multibyte character. | |
| 6513 Check if it contains eight-bit-graphic. */ | |
| 6514 p = p1; | |
| 6515 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++); | |
| 6516 if (p == p1end) | |
| 6517 { | |
| 6518 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++); | |
| 6519 if (p == p2end) | |
| 6520 return Qt; | |
| 6521 } | |
| 6522 } | |
| 6523 | |
| 6524 /* The text contains non-ASCII characters. */ | |
| 6525 work_table = Fcopy_sequence (Vchar_coding_system_table); | |
| 6526 safe_codings = find_safe_codings (p1, p1end, Qt, work_table, | |
| 6527 &single_byte_char_found); | |
| 6528 if (p2 < p2end) | |
| 6529 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table, | |
| 6530 &single_byte_char_found); | |
| 6531 | |
| 6532 if (EQ (safe_codings, Qt)) | |
| 6533 ; /* Nothing to be done. */ | |
| 6534 else if (!single_byte_char_found) | |
| 6535 { | |
| 6536 /* Append generic coding systems. */ | |
| 6537 Lisp_Object args[2]; | |
| 6538 args[0] = safe_codings; | |
| 6539 args[1] = Fchar_table_extra_slot (Vchar_coding_system_table, | |
| 6540 make_number (0)); | |
| 6541 safe_codings = Fappend (2, args); | |
| 6542 } | |
| 6543 else | |
| 6544 safe_codings = Fcons (Qraw_text, | |
| 6545 Fcons (Qemacs_mule, | |
| 6546 Fcons (Qno_conversion, safe_codings))); | |
| 6547 return safe_codings; | |
| 6548 } | |
| 6549 | |
| 6550 | |
| 6551 static Lisp_Object | |
| 6552 find_safe_codings_2 (p, pend, safe_codings, work_table, single_byte_char_found) | |
| 6553 unsigned char *p, *pend; | 6404 unsigned char *p, *pend; |
| 6554 Lisp_Object safe_codings, work_table; | 6405 Lisp_Object safe_codings, work_table; |
| 6555 int *single_byte_char_found; | 6406 int *single_byte_char_found; |
| 6556 { | 6407 { |
| 6557 int c, len, i; | 6408 int c, len, i; |
| 6595 } | 6446 } |
| 6596 } | 6447 } |
| 6597 return safe_codings; | 6448 return safe_codings; |
| 6598 } | 6449 } |
| 6599 | 6450 |
| 6600 DEFUN ("find-coding-systems-region-internal-2", | 6451 DEFUN ("find-coding-systems-region-internal", |
| 6601 Ffind_coding_systems_region_internal_2, | 6452 Ffind_coding_systems_region_internal, |
| 6602 Sfind_coding_systems_region_internal_2, 2, 2, 0, | 6453 Sfind_coding_systems_region_internal, 2, 2, 0, |
| 6603 doc: /* Internal use only. */) | 6454 doc: /* Internal use only. */) |
| 6604 (start, end) | 6455 (start, end) |
| 6605 Lisp_Object start, end; | 6456 Lisp_Object start, end; |
| 6606 { | 6457 { |
| 6607 Lisp_Object work_table, safe_codings; | 6458 Lisp_Object work_table, safe_codings; |
| 6657 /* The text contains non-ASCII characters. */ | 6508 /* The text contains non-ASCII characters. */ |
| 6658 | 6509 |
| 6659 work_table = Fmake_char_table (Qchar_coding_system, Qnil); | 6510 work_table = Fmake_char_table (Qchar_coding_system, Qnil); |
| 6660 safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars)); | 6511 safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars)); |
| 6661 | 6512 |
| 6662 safe_codings = find_safe_codings_2 (p1, p1end, safe_codings, work_table, | 6513 safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table, |
| 6514 &single_byte_char_found); | |
| 6515 if (p2 < p2end) | |
| 6516 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table, | |
| 6663 &single_byte_char_found); | 6517 &single_byte_char_found); |
| 6664 if (p2 < p2end) | |
| 6665 safe_codings = find_safe_codings_2 (p2, p2end, safe_codings, work_table, | |
| 6666 &single_byte_char_found); | |
| 6667 if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars))) | 6518 if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars))) |
| 6668 safe_codings = Qt; | 6519 safe_codings = Qt; |
| 6669 else | 6520 else |
| 6670 { | 6521 { |
| 6671 /* Turn safe_codings to a list of coding systems... */ | 6522 /* Turn safe_codings to a list of coding systems... */ |
| 7532 /* Intern this now in case it isn't already done. | 7383 /* Intern this now in case it isn't already done. |
| 7533 Setting this variable twice is harmless. | 7384 Setting this variable twice is harmless. |
| 7534 But don't staticpro it here--that is done in alloc.c. */ | 7385 But don't staticpro it here--that is done in alloc.c. */ |
| 7535 Qchar_table_extra_slots = intern ("char-table-extra-slots"); | 7386 Qchar_table_extra_slots = intern ("char-table-extra-slots"); |
| 7536 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0)); | 7387 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0)); |
| 7537 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2)); | 7388 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0)); |
| 7538 | 7389 |
| 7539 Qvalid_codes = intern ("valid-codes"); | 7390 Qvalid_codes = intern ("valid-codes"); |
| 7540 staticpro (&Qvalid_codes); | 7391 staticpro (&Qvalid_codes); |
| 7541 | 7392 |
| 7542 Qemacs_mule = intern ("emacs-mule"); | 7393 Qemacs_mule = intern ("emacs-mule"); |
| 7550 defsubr (&Sread_non_nil_coding_system); | 7401 defsubr (&Sread_non_nil_coding_system); |
| 7551 defsubr (&Scheck_coding_system); | 7402 defsubr (&Scheck_coding_system); |
| 7552 defsubr (&Sdetect_coding_region); | 7403 defsubr (&Sdetect_coding_region); |
| 7553 defsubr (&Sdetect_coding_string); | 7404 defsubr (&Sdetect_coding_string); |
| 7554 defsubr (&Sfind_coding_systems_region_internal); | 7405 defsubr (&Sfind_coding_systems_region_internal); |
| 7555 defsubr (&Sfind_coding_systems_region_internal_2); | |
| 7556 defsubr (&Sunencodable_char_position); | 7406 defsubr (&Sunencodable_char_position); |
| 7557 defsubr (&Sdecode_coding_region); | 7407 defsubr (&Sdecode_coding_region); |
| 7558 defsubr (&Sencode_coding_region); | 7408 defsubr (&Sencode_coding_region); |
| 7559 defsubr (&Sdecode_coding_string); | 7409 defsubr (&Sdecode_coding_string); |
| 7560 defsubr (&Sencode_coding_string); | 7410 defsubr (&Sencode_coding_string); |
| 7772 called even if `coding-system-for-write' is non-nil. The command | 7622 called even if `coding-system-for-write' is non-nil. The command |
| 7773 `universal-coding-system-argument' binds this variable to t temporarily. */); | 7623 `universal-coding-system-argument' binds this variable to t temporarily. */); |
| 7774 coding_system_require_warning = 0; | 7624 coding_system_require_warning = 0; |
| 7775 | 7625 |
| 7776 | 7626 |
| 7777 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table, | |
| 7778 doc: /* Char-table containing safe coding systems of each characters. | |
| 7779 Each element doesn't include such generic coding systems that can | |
| 7780 encode any characters. They are in the first extra slot. */); | |
| 7781 Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil); | |
| 7782 | |
| 7783 DEFVAR_BOOL ("inhibit-iso-escape-detection", | 7627 DEFVAR_BOOL ("inhibit-iso-escape-detection", |
| 7784 &inhibit_iso_escape_detection, | 7628 &inhibit_iso_escape_detection, |
| 7785 doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection. | 7629 doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection. |
| 7786 | 7630 |
| 7787 By default, on reading a file, Emacs tries to detect how the text is | 7631 By default, on reading a file, Emacs tries to detect how the text is |
