comparison src/coding.c @ 50185:b64f1af6e142

(Vchar_coding_system_table): Remove this variable. (Vcoding_system_safe_chars): New variable. (intersection): Remove this function. (find_safe_codings): Don't use Vchar_coding_system_table, but try all codings in SAFE_CODINGS. (Ffind_coding_systems_region_internal): Adjusted for the change of find_safe_codings. Get generic coding systems from Vcoding_system_safe_chars. (Fdefine_coding_system_internal): New function. (syms_of_coding): Defsubr Sdefine_coding_system_internal. Initialize and staticpro Vcoding_system_safe_chars.
author Kenichi Handa <handa@m17n.org>
date Tue, 18 Mar 2003 04:26:15 +0000
parents 71039837d881
children 6818550bfbc7
comparison
equal deleted inserted replaced
50184:fb92861f566e 50185:b64f1af6e142
504 pre-write-conversion functions. Usually the value is zero, but it 504 pre-write-conversion functions. Usually the value is zero, but it
505 is set to 1 temporarily while such functions are running. This is 505 is set to 1 temporarily while such functions are running. This is
506 to avoid infinite recursive call. */ 506 to avoid infinite recursive call. */
507 static int inhibit_pre_post_conversion; 507 static int inhibit_pre_post_conversion;
508 508
509 /* Char-table containing safe coding systems of each character. */
510 Lisp_Object Vchar_coding_system_table;
511 Lisp_Object Qchar_coding_system; 509 Lisp_Object Qchar_coding_system;
512 510
513 /* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check 511 /* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check
514 its validity. */ 512 its validity. */
515 513
6386 SBYTES (string) + 1, 6384 SBYTES (string) + 1,
6387 !NILP (highest), 6385 !NILP (highest),
6388 STRING_MULTIBYTE (string)); 6386 STRING_MULTIBYTE (string));
6389 } 6387 }
6390 6388
6391 /* Return an intersection of lists L1 and L2. */
6392
6393 static Lisp_Object
6394 intersection (l1, l2)
6395 Lisp_Object l1, l2;
6396 {
6397 Lisp_Object val = Fcons (Qnil, Qnil), tail;
6398
6399 for (tail = val; CONSP (l1); l1 = XCDR (l1))
6400 {
6401 if (!NILP (Fmemq (XCAR (l1), l2)))
6402 {
6403 XSETCDR (tail, Fcons (XCAR (l1), Qnil));
6404 tail = XCDR (tail);
6405 }
6406 }
6407 return XCDR (val);
6408 }
6409
6410
6411 /* Subroutine for Fsafe_coding_systems_region_internal. 6389 /* Subroutine for Fsafe_coding_systems_region_internal.
6412 6390
6413 Return a list of coding systems that safely encode the multibyte 6391 Return a list of coding systems that safely encode the multibyte
6414 text between P and PEND. SAFE_CODINGS, if non-nil, is a list of 6392 text between P and PEND. SAFE_CODINGS, if non-nil, is a list of
6415 possible coding systems. If it is nil, it means that we have not 6393 possible coding systems. If it is nil, it means that we have not
6421 If a non-ASCII single byte char is found, set 6399 If a non-ASCII single byte char is found, set
6422 *single_byte_char_found to 1. */ 6400 *single_byte_char_found to 1. */
6423 6401
6424 static Lisp_Object 6402 static Lisp_Object
6425 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) 6403 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6426 unsigned char *p, *pend;
6427 Lisp_Object safe_codings, work_table;
6428 int *single_byte_char_found;
6429 {
6430 int c, len, idx;
6431 Lisp_Object val;
6432
6433 while (p < pend)
6434 {
6435 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6436 p += len;
6437 if (ASCII_BYTE_P (c))
6438 /* We can ignore ASCII characters here. */
6439 continue;
6440 if (SINGLE_BYTE_CHAR_P (c))
6441 *single_byte_char_found = 1;
6442 if (NILP (safe_codings))
6443 continue;
6444 /* Check the safe coding systems for C. */
6445 val = char_table_ref_and_index (work_table, c, &idx);
6446 if (EQ (val, Qt))
6447 /* This element was already checked. Ignore it. */
6448 continue;
6449 /* Remember that we checked this element. */
6450 CHAR_TABLE_SET (work_table, make_number (idx), Qt);
6451
6452 /* If there are some safe coding systems for C and we have
6453 already found the other set of coding systems for the
6454 different characters, get the intersection of them. */
6455 if (!EQ (safe_codings, Qt) && !NILP (val))
6456 val = intersection (safe_codings, val);
6457 safe_codings = val;
6458 }
6459 return safe_codings;
6460 }
6461
6462
6463 /* Return a list of coding systems that safely encode the text between
6464 START and END. If the text contains only ASCII or is unibyte,
6465 return t. */
6466
6467 DEFUN ("find-coding-systems-region-internal",
6468 Ffind_coding_systems_region_internal,
6469 Sfind_coding_systems_region_internal, 2, 2, 0,
6470 doc: /* Internal use only. */)
6471 (start, end)
6472 Lisp_Object start, end;
6473 {
6474 Lisp_Object work_table, safe_codings;
6475 int non_ascii_p = 0;
6476 int single_byte_char_found = 0;
6477 const unsigned char *p1, *p1end, *p2, *p2end, *p;
6478
6479 if (STRINGP (start))
6480 {
6481 if (!STRING_MULTIBYTE (start))
6482 return Qt;
6483 p1 = SDATA (start), p1end = p1 + SBYTES (start);
6484 p2 = p2end = p1end;
6485 if (SCHARS (start) != SBYTES (start))
6486 non_ascii_p = 1;
6487 }
6488 else
6489 {
6490 int from, to, stop;
6491
6492 CHECK_NUMBER_COERCE_MARKER (start);
6493 CHECK_NUMBER_COERCE_MARKER (end);
6494 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6495 args_out_of_range (start, end);
6496 if (NILP (current_buffer->enable_multibyte_characters))
6497 return Qt;
6498 from = CHAR_TO_BYTE (XINT (start));
6499 to = CHAR_TO_BYTE (XINT (end));
6500 stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6501 p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6502 if (stop == to)
6503 p2 = p2end = p1end;
6504 else
6505 p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6506 if (XINT (end) - XINT (start) != to - from)
6507 non_ascii_p = 1;
6508 }
6509
6510 if (!non_ascii_p)
6511 {
6512 /* We are sure that the text contains no multibyte character.
6513 Check if it contains eight-bit-graphic. */
6514 p = p1;
6515 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6516 if (p == p1end)
6517 {
6518 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6519 if (p == p2end)
6520 return Qt;
6521 }
6522 }
6523
6524 /* The text contains non-ASCII characters. */
6525 work_table = Fcopy_sequence (Vchar_coding_system_table);
6526 safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
6527 &single_byte_char_found);
6528 if (p2 < p2end)
6529 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6530 &single_byte_char_found);
6531
6532 if (EQ (safe_codings, Qt))
6533 ; /* Nothing to be done. */
6534 else if (!single_byte_char_found)
6535 {
6536 /* Append generic coding systems. */
6537 Lisp_Object args[2];
6538 args[0] = safe_codings;
6539 args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
6540 make_number (0));
6541 safe_codings = Fappend (2, args);
6542 }
6543 else
6544 safe_codings = Fcons (Qraw_text,
6545 Fcons (Qemacs_mule,
6546 Fcons (Qno_conversion, safe_codings)));
6547 return safe_codings;
6548 }
6549
6550
6551 static Lisp_Object
6552 find_safe_codings_2 (p, pend, safe_codings, work_table, single_byte_char_found)
6553 unsigned char *p, *pend; 6404 unsigned char *p, *pend;
6554 Lisp_Object safe_codings, work_table; 6405 Lisp_Object safe_codings, work_table;
6555 int *single_byte_char_found; 6406 int *single_byte_char_found;
6556 { 6407 {
6557 int c, len, i; 6408 int c, len, i;
6595 } 6446 }
6596 } 6447 }
6597 return safe_codings; 6448 return safe_codings;
6598 } 6449 }
6599 6450
6600 DEFUN ("find-coding-systems-region-internal-2", 6451 DEFUN ("find-coding-systems-region-internal",
6601 Ffind_coding_systems_region_internal_2, 6452 Ffind_coding_systems_region_internal,
6602 Sfind_coding_systems_region_internal_2, 2, 2, 0, 6453 Sfind_coding_systems_region_internal, 2, 2, 0,
6603 doc: /* Internal use only. */) 6454 doc: /* Internal use only. */)
6604 (start, end) 6455 (start, end)
6605 Lisp_Object start, end; 6456 Lisp_Object start, end;
6606 { 6457 {
6607 Lisp_Object work_table, safe_codings; 6458 Lisp_Object work_table, safe_codings;
6657 /* The text contains non-ASCII characters. */ 6508 /* The text contains non-ASCII characters. */
6658 6509
6659 work_table = Fmake_char_table (Qchar_coding_system, Qnil); 6510 work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6660 safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars)); 6511 safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6661 6512
6662 safe_codings = find_safe_codings_2 (p1, p1end, safe_codings, work_table, 6513 safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6514 &single_byte_char_found);
6515 if (p2 < p2end)
6516 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6663 &single_byte_char_found); 6517 &single_byte_char_found);
6664 if (p2 < p2end)
6665 safe_codings = find_safe_codings_2 (p2, p2end, safe_codings, work_table,
6666 &single_byte_char_found);
6667 if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars))) 6518 if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6668 safe_codings = Qt; 6519 safe_codings = Qt;
6669 else 6520 else
6670 { 6521 {
6671 /* Turn safe_codings to a list of coding systems... */ 6522 /* Turn safe_codings to a list of coding systems... */
7532 /* Intern this now in case it isn't already done. 7383 /* Intern this now in case it isn't already done.
7533 Setting this variable twice is harmless. 7384 Setting this variable twice is harmless.
7534 But don't staticpro it here--that is done in alloc.c. */ 7385 But don't staticpro it here--that is done in alloc.c. */
7535 Qchar_table_extra_slots = intern ("char-table-extra-slots"); 7386 Qchar_table_extra_slots = intern ("char-table-extra-slots");
7536 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0)); 7387 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7537 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2)); 7388 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7538 7389
7539 Qvalid_codes = intern ("valid-codes"); 7390 Qvalid_codes = intern ("valid-codes");
7540 staticpro (&Qvalid_codes); 7391 staticpro (&Qvalid_codes);
7541 7392
7542 Qemacs_mule = intern ("emacs-mule"); 7393 Qemacs_mule = intern ("emacs-mule");
7550 defsubr (&Sread_non_nil_coding_system); 7401 defsubr (&Sread_non_nil_coding_system);
7551 defsubr (&Scheck_coding_system); 7402 defsubr (&Scheck_coding_system);
7552 defsubr (&Sdetect_coding_region); 7403 defsubr (&Sdetect_coding_region);
7553 defsubr (&Sdetect_coding_string); 7404 defsubr (&Sdetect_coding_string);
7554 defsubr (&Sfind_coding_systems_region_internal); 7405 defsubr (&Sfind_coding_systems_region_internal);
7555 defsubr (&Sfind_coding_systems_region_internal_2);
7556 defsubr (&Sunencodable_char_position); 7406 defsubr (&Sunencodable_char_position);
7557 defsubr (&Sdecode_coding_region); 7407 defsubr (&Sdecode_coding_region);
7558 defsubr (&Sencode_coding_region); 7408 defsubr (&Sencode_coding_region);
7559 defsubr (&Sdecode_coding_string); 7409 defsubr (&Sdecode_coding_string);
7560 defsubr (&Sencode_coding_string); 7410 defsubr (&Sencode_coding_string);
7772 called even if `coding-system-for-write' is non-nil. The command 7622 called even if `coding-system-for-write' is non-nil. The command
7773 `universal-coding-system-argument' binds this variable to t temporarily. */); 7623 `universal-coding-system-argument' binds this variable to t temporarily. */);
7774 coding_system_require_warning = 0; 7624 coding_system_require_warning = 0;
7775 7625
7776 7626
7777 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
7778 doc: /* Char-table containing safe coding systems of each characters.
7779 Each element doesn't include such generic coding systems that can
7780 encode any characters. They are in the first extra slot. */);
7781 Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
7782
7783 DEFVAR_BOOL ("inhibit-iso-escape-detection", 7627 DEFVAR_BOOL ("inhibit-iso-escape-detection",
7784 &inhibit_iso_escape_detection, 7628 &inhibit_iso_escape_detection,
7785 doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection. 7629 doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7786 7630
7787 By default, on reading a file, Emacs tries to detect how the text is 7631 By default, on reading a file, Emacs tries to detect how the text is