comparison src/coding.c @ 88155:d7ddb3e565de

sync with trunk
author Henrik Enberg <henrik.enberg@telia.com>
date Mon, 16 Jan 2006 00:03:54 +0000
parents 23a1cea22d13
children
comparison
equal deleted inserted replaced
88154:8ce476d3ba36 88155:d7ddb3e565de
1 /* Coding system handler (conversion, detection, and etc). 1 /* Coding system handler (conversion, detection, and etc).
2 Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN. 2 Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Licensed to the Free Software Foundation. 3 Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
4 Copyright (C) 2001,2002 Free Software Foundation, Inc. 4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
5 6
6 This file is part of GNU Emacs. 7 This file is part of GNU Emacs.
7 8
8 GNU Emacs is free software; you can redistribute it and/or modify 9 GNU Emacs is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by 10 it under the terms of the GNU General Public License as published by
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details. 17 GNU General Public License for more details.
17 18
18 You should have received a copy of the GNU General Public License 19 You should have received a copy of the GNU General Public License
19 along with GNU Emacs; see the file COPYING. If not, write to 20 along with GNU Emacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 21 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02111-1307, USA. */ 22 Boston, MA 02110-1301, USA. */
22 23
23 /*** TABLE OF CONTENTS *** 24 /*** TABLE OF CONTENTS ***
24 25
25 0. General comments 26 0. General comments
26 1. Preamble 27 1. Preamble
145 Below is a template for these functions. */ 146 Below is a template for these functions. */
146 #if 0 147 #if 0
147 static void 148 static void
148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) 149 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
149 struct coding_system *coding; 150 struct coding_system *coding;
150 unsigned char *source, *destination; 151 const unsigned char *source;
152 unsigned char *destination;
151 int src_bytes, dst_bytes; 153 int src_bytes, dst_bytes;
152 { 154 {
153 ... 155 ...
154 } 156 }
155 #endif 157 #endif
343 #include "charset.h" 345 #include "charset.h"
344 #include "composite.h" 346 #include "composite.h"
345 #include "ccl.h" 347 #include "ccl.h"
346 #include "coding.h" 348 #include "coding.h"
347 #include "window.h" 349 #include "window.h"
350 #include "intervals.h"
348 351
349 #else /* not emacs */ 352 #else /* not emacs */
350 353
351 #include "mulelib.h" 354 #include "mulelib.h"
352 355
359 Lisp_Object Qcoding_system_history; 362 Lisp_Object Qcoding_system_history;
360 Lisp_Object Qsafe_chars; 363 Lisp_Object Qsafe_chars;
361 Lisp_Object Qvalid_codes; 364 Lisp_Object Qvalid_codes;
362 365
363 extern Lisp_Object Qinsert_file_contents, Qwrite_region; 366 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
364 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; 367 Lisp_Object Qcall_process, Qcall_process_region;
365 Lisp_Object Qstart_process, Qopen_network_stream; 368 Lisp_Object Qstart_process, Qopen_network_stream;
366 Lisp_Object Qtarget_idx; 369 Lisp_Object Qtarget_idx;
370
371 /* If a symbol has this property, evaluate the value to define the
372 symbol as a coding system. */
373 Lisp_Object Qcoding_system_define_form;
367 374
368 Lisp_Object Vselect_safe_coding_system_function; 375 Lisp_Object Vselect_safe_coding_system_function;
369 376
370 int coding_system_require_warning; 377 int coding_system_require_warning;
371 378
396 Lisp_Object Qcoding_system_p, Qcoding_system_error; 403 Lisp_Object Qcoding_system_p, Qcoding_system_error;
397 404
398 /* Coding system emacs-mule and raw-text are for converting only 405 /* Coding system emacs-mule and raw-text are for converting only
399 end-of-line format. */ 406 end-of-line format. */
400 Lisp_Object Qemacs_mule, Qraw_text; 407 Lisp_Object Qemacs_mule, Qraw_text;
408
409 Lisp_Object Qutf_8;
401 410
402 /* Coding-systems are handed between Emacs Lisp programs and C internal 411 /* Coding-systems are handed between Emacs Lisp programs and C internal
403 routines by the following three variables. */ 412 routines by the following three variables. */
404 /* Coding-system for reading files and receiving data from process. */ 413 /* Coding-system for reading files and receiving data from process. */
405 Lisp_Object Vcoding_system_for_read; 414 Lisp_Object Vcoding_system_for_read;
504 pre-write-conversion functions. Usually the value is zero, but it 513 pre-write-conversion functions. Usually the value is zero, but it
505 is set to 1 temporarily while such functions are running. This is 514 is set to 1 temporarily while such functions are running. This is
506 to avoid infinite recursive call. */ 515 to avoid infinite recursive call. */
507 static int inhibit_pre_post_conversion; 516 static int inhibit_pre_post_conversion;
508 517
509 /* Char-table containing safe coding systems of each character. */
510 Lisp_Object Vchar_coding_system_table;
511 Lisp_Object Qchar_coding_system; 518 Lisp_Object Qchar_coding_system;
512 519
513 /* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check 520 /* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check
514 its validity. */ 521 its validity. */
515 522
686 data[2] = cmp_data->char_offset + end; \ 693 data[2] = cmp_data->char_offset + end; \
687 } while (0) 694 } while (0)
688 695
689 /* Record one COMPONENT (alternate character or composition rule). */ 696 /* Record one COMPONENT (alternate character or composition rule). */
690 697
691 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ 698 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \
692 (coding->cmp_data->data[coding->cmp_data->used++] = component) 699 do { \
700 coding->cmp_data->data[coding->cmp_data->used++] = component; \
701 if (coding->cmp_data->used - coding->cmp_data_start \
702 == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \
703 { \
704 CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \
705 coding->composing = COMPOSITION_NO; \
706 } \
707 } while (0)
693 708
694 709
695 /* Get one byte from a data pointed by SRC and increment SRC. If SRC 710 /* Get one byte from a data pointed by SRC and increment SRC. If SRC
696 is not less than SRC_END, return -1 without incrementing Src. */ 711 is not less than SRC_END, return -1 without incrementing Src. */
697 712
704 sequence. If no valid character is found, set C to -1. */ 719 sequence. If no valid character is found, set C to -1. */
705 720
706 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \ 721 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \
707 do { \ 722 do { \
708 int bytes; \ 723 int bytes; \
709 \ 724 \
710 c = SAFE_ONE_MORE_BYTE (); \ 725 c = SAFE_ONE_MORE_BYTE (); \
711 if (c < 0) \ 726 if (c < 0) \
712 break; \ 727 break; \
713 if (CHAR_HEAD_P (c)) \ 728 if (CHAR_HEAD_P (c)) \
714 c = -1; \ 729 c = -1; \
735 c = SAFE_ONE_MORE_BYTE (); \ 750 c = SAFE_ONE_MORE_BYTE (); \
736 if (c < 0) \ 751 if (c < 0) \
737 break; \ 752 break; \
738 *p++ = c; \ 753 *p++ = c; \
739 } \ 754 } \
740 if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \ 755 if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes) \
756 || (coding->flags /* We are recovering a file. */ \
757 && p0[0] == LEADING_CODE_8_BIT_CONTROL \
758 && ! CHAR_HEAD_P (p0[1]))) \
741 c = STRING_CHAR (p0, bytes); \ 759 c = STRING_CHAR (p0, bytes); \
742 else \ 760 else \
743 c = -1; \ 761 c = -1; \
744 } \ 762 } \
745 else \ 763 else \
782 */ 800 */
783 static INLINE int 801 static INLINE int
784 decode_composition_emacs_mule (coding, src, src_end, 802 decode_composition_emacs_mule (coding, src, src_end,
785 destination, dst_end, dst_bytes) 803 destination, dst_end, dst_bytes)
786 struct coding_system *coding; 804 struct coding_system *coding;
787 unsigned char *src, *src_end, **destination, *dst_end; 805 const unsigned char *src, *src_end;
806 unsigned char **destination, *dst_end;
788 int dst_bytes; 807 int dst_bytes;
789 { 808 {
790 unsigned char *dst = *destination; 809 unsigned char *dst = *destination;
791 int method, data_len, nchars; 810 int method, data_len, nchars;
792 unsigned char *src_base = src++; 811 const unsigned char *src_base = src++;
793 /* Store components of composition. */ 812 /* Store components of composition. */
794 int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; 813 int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
795 int ncomponent; 814 int ncomponent;
796 /* Store multibyte form of characters to be composed. This is for 815 /* Store multibyte form of characters to be composed. This is for
797 Emacs 20 style composition sequence. */ 816 Emacs 20 style composition sequence. */
839 c = COMPOSITION_ENCODE_RULE (gref, nref); 858 c = COMPOSITION_ENCODE_RULE (gref, nref);
840 } 859 }
841 else 860 else
842 { 861 {
843 int bytes; 862 int bytes;
844 if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) 863 if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
864 || (coding->flags /* We are recovering a file. */
865 && src[0] == LEADING_CODE_8_BIT_CONTROL
866 && ! CHAR_HEAD_P (src[1])))
845 c = STRING_CHAR (src, bytes); 867 c = STRING_CHAR (src, bytes);
846 else 868 else
847 c = *src, bytes = 1; 869 c = *src, bytes = 1;
848 src += bytes; 870 src += bytes;
849 } 871 }
925 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 947 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
926 948
927 static void 949 static void
928 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) 950 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
929 struct coding_system *coding; 951 struct coding_system *coding;
930 unsigned char *source, *destination; 952 const unsigned char *source;
953 unsigned char *destination;
931 int src_bytes, dst_bytes; 954 int src_bytes, dst_bytes;
932 { 955 {
933 unsigned char *src = source; 956 const unsigned char *src = source;
934 unsigned char *src_end = source + src_bytes; 957 const unsigned char *src_end = source + src_bytes;
935 unsigned char *dst = destination; 958 unsigned char *dst = destination;
936 unsigned char *dst_end = destination + dst_bytes; 959 unsigned char *dst_end = destination + dst_bytes;
937 /* SRC_BASE remembers the start position in source in each loop. 960 /* SRC_BASE remembers the start position in source in each loop.
938 The loop will be exited when there's not enough source code, or 961 The loop will be exited when there's not enough source code, or
939 when there's not enough destination area to produce a 962 when there's not enough destination area to produce a
940 character. */ 963 character. */
941 unsigned char *src_base; 964 const unsigned char *src_base;
942 965
943 coding->produced_char = 0; 966 coding->produced_char = 0;
944 while ((src_base = src) < src_end) 967 while ((src_base = src) < src_end)
945 { 968 {
946 unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p; 969 unsigned char tmp[MAX_MULTIBYTE_LENGTH];
970 const unsigned char *p;
947 int bytes; 971 int bytes;
948 972
949 if (*src == '\r') 973 if (*src == '\r')
950 { 974 {
951 int c = *src++; 975 int c = *src++;
993 } 1017 }
994 bytes = CHAR_STRING (*src, tmp); 1018 bytes = CHAR_STRING (*src, tmp);
995 p = tmp; 1019 p = tmp;
996 src++; 1020 src++;
997 } 1021 }
998 else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) 1022 else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
1023 || (coding->flags /* We are recovering a file. */
1024 && src[0] == LEADING_CODE_8_BIT_CONTROL
1025 && ! CHAR_HEAD_P (src[1])))
999 { 1026 {
1000 p = src; 1027 p = src;
1001 src += bytes; 1028 src += bytes;
1002 } 1029 }
1003 else 1030 else
1004 { 1031 {
1005 bytes = CHAR_STRING (*src, tmp); 1032 int i, c;
1006 p = tmp; 1033
1034 bytes = BYTES_BY_CHAR_HEAD (*src);
1007 src++; 1035 src++;
1036 for (i = 1; i < bytes; i++)
1037 {
1038 ONE_MORE_BYTE (c);
1039 if (CHAR_HEAD_P (c))
1040 break;
1041 }
1042 if (i < bytes)
1043 {
1044 bytes = CHAR_STRING (*src_base, tmp);
1045 p = tmp;
1046 src = src_base + 1;
1047 }
1048 else
1049 {
1050 p = src_base;
1051 }
1008 } 1052 }
1009 if (dst + bytes >= (dst_bytes ? dst_end : src)) 1053 if (dst + bytes >= (dst_bytes ? dst_end : src))
1010 { 1054 {
1011 coding->result = CODING_FINISH_INSUFFICIENT_DST; 1055 coding->result = CODING_FINISH_INSUFFICIENT_DST;
1012 break; 1056 break;
1075 unsigned char *, int, int)); 1119 unsigned char *, int, int));
1076 1120
1077 static void 1121 static void
1078 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) 1122 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1079 struct coding_system *coding; 1123 struct coding_system *coding;
1080 unsigned char *source, *destination; 1124 const unsigned char *source;
1125 unsigned char *destination;
1081 int src_bytes, dst_bytes; 1126 int src_bytes, dst_bytes;
1082 { 1127 {
1083 unsigned char *src = source; 1128 const unsigned char *src = source;
1084 unsigned char *src_end = source + src_bytes; 1129 const unsigned char *src_end = source + src_bytes;
1085 unsigned char *dst = destination; 1130 unsigned char *dst = destination;
1086 unsigned char *dst_end = destination + dst_bytes; 1131 unsigned char *dst_end = destination + dst_bytes;
1087 unsigned char *src_base; 1132 const unsigned char *src_base;
1088 int c; 1133 int c;
1089 int char_offset; 1134 int char_offset;
1090 int *data; 1135 int *data;
1091 1136
1092 Lisp_Object translation_table; 1137 Lisp_Object translation_table;
1124 EMIT_TWO_BYTES ('\r', c); 1169 EMIT_TWO_BYTES ('\r', c);
1125 else 1170 else
1126 EMIT_ONE_BYTE ('\r'); 1171 EMIT_ONE_BYTE ('\r');
1127 } 1172 }
1128 else if (SINGLE_BYTE_CHAR_P (c)) 1173 else if (SINGLE_BYTE_CHAR_P (c))
1129 EMIT_ONE_BYTE (c); 1174 {
1175 if (coding->flags && ! ASCII_BYTE_P (c))
1176 {
1177 /* As we are auto saving, retain the multibyte form for
1178 8-bit chars. */
1179 unsigned char buf[MAX_MULTIBYTE_LENGTH];
1180 int bytes = CHAR_STRING (c, buf);
1181
1182 if (bytes == 1)
1183 EMIT_ONE_BYTE (buf[0]);
1184 else
1185 EMIT_TWO_BYTES (buf[0], buf[1]);
1186 }
1187 else
1188 EMIT_ONE_BYTE (c);
1189 }
1130 else 1190 else
1131 EMIT_BYTES (src_base, src); 1191 EMIT_BYTES (src_base, src);
1132 coding->consumed_char++; 1192 coding->consumed_char++;
1133 } 1193 }
1134 label_end_of_loop: 1194 label_end_of_loop:
1326 charset) \ 1386 charset) \
1327 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) 1387 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1328 1388
1329 #define SHIFT_OUT_OK(idx) \ 1389 #define SHIFT_OUT_OK(idx) \
1330 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) 1390 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1391
1392 #define COMPOSITION_OK(idx) \
1393 (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1331 1394
1332 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 1395 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1333 Check if a text is encoded in ISO2022. If it is, return an 1396 Check if a text is encoded in ISO2022. If it is, return an
1334 integer in which appropriate flag bits any of: 1397 integer in which appropriate flag bits any of:
1335 CODING_CATEGORY_MASK_ISO_7 1398 CODING_CATEGORY_MASK_ISO_7
1404 break; 1467 break;
1405 } 1468 }
1406 else if (c >= '0' && c <= '4') 1469 else if (c >= '0' && c <= '4')
1407 { 1470 {
1408 /* ESC <Fp> for start/end composition. */ 1471 /* ESC <Fp> for start/end composition. */
1409 mask_found |= CODING_CATEGORY_MASK_ISO; 1472 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1473 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1474 else
1475 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1476 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1477 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1478 else
1479 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1480 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1481 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1482 else
1483 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1484 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1485 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1486 else
1487 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1488 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1489 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1490 else
1491 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1492 if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1493 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1494 else
1495 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1410 break; 1496 break;
1411 } 1497 }
1412 else 1498 else
1413 /* Invalid escape sequence. Just ignore. */ 1499 /* Invalid escape sequence. Just ignore. */
1414 break; 1500 break;
1624 cmp_data->next = NULL; 1710 cmp_data->next = NULL;
1625 if (coding->cmp_data) 1711 if (coding->cmp_data)
1626 coding->cmp_data->next = cmp_data; 1712 coding->cmp_data->next = cmp_data;
1627 coding->cmp_data = cmp_data; 1713 coding->cmp_data = cmp_data;
1628 coding->cmp_data_start = 0; 1714 coding->cmp_data_start = 0;
1715 coding->composing = COMPOSITION_NO;
1629 } 1716 }
1630 1717
1631 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. 1718 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1632 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1 1719 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1633 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 1720 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1727 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 1814 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1728 1815
1729 static void 1816 static void
1730 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) 1817 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1731 struct coding_system *coding; 1818 struct coding_system *coding;
1732 unsigned char *source, *destination; 1819 const unsigned char *source;
1820 unsigned char *destination;
1733 int src_bytes, dst_bytes; 1821 int src_bytes, dst_bytes;
1734 { 1822 {
1735 unsigned char *src = source; 1823 const unsigned char *src = source;
1736 unsigned char *src_end = source + src_bytes; 1824 const unsigned char *src_end = source + src_bytes;
1737 unsigned char *dst = destination; 1825 unsigned char *dst = destination;
1738 unsigned char *dst_end = destination + dst_bytes; 1826 unsigned char *dst_end = destination + dst_bytes;
1739 /* Charsets invoked to graphic plane 0 and 1 respectively. */ 1827 /* Charsets invoked to graphic plane 0 and 1 respectively. */
1740 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1828 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1741 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); 1829 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1742 /* SRC_BASE remembers the start position in source in each loop. 1830 /* SRC_BASE remembers the start position in source in each loop.
1743 The loop will be exited when there's not enough source code 1831 The loop will be exited when there's not enough source code
1744 (within macro ONE_MORE_BYTE), or when there's not enough 1832 (within macro ONE_MORE_BYTE), or when there's not enough
1745 destination area to produce a character (within macro 1833 destination area to produce a character (within macro
1746 EMIT_CHAR). */ 1834 EMIT_CHAR). */
1747 unsigned char *src_base; 1835 const unsigned char *src_base;
1748 int c, charset; 1836 int c, charset;
1749 Lisp_Object translation_table; 1837 Lisp_Object translation_table;
1750 Lisp_Object safe_chars; 1838 Lisp_Object safe_chars;
1751 1839
1752 safe_chars = coding_safe_chars (coding->symbol); 1840 safe_chars = coding_safe_chars (coding->symbol);
1762 1850
1763 coding->result = CODING_FINISH_NORMAL; 1851 coding->result = CODING_FINISH_NORMAL;
1764 1852
1765 while (1) 1853 while (1)
1766 { 1854 {
1767 int c1, c2; 1855 int c1, c2 = 0;
1768 1856
1769 src_base = src; 1857 src_base = src;
1770 ONE_MORE_BYTE (c1); 1858 ONE_MORE_BYTE (c1);
1771 1859
1772 /* We produce no character or one character. */ 1860 /* We produce no character or one character. */
2002 default: 2090 default:
2003 goto label_invalid_code; 2091 goto label_invalid_code;
2004 } 2092 }
2005 continue; 2093 continue;
2006 2094
2095 case '%':
2096 if (COMPOSING_P (coding))
2097 DECODE_COMPOSITION_END ('1');
2098 ONE_MORE_BYTE (c1);
2099 if (c1 == '/')
2100 {
2101 /* CTEXT extended segment:
2102 ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
2103 We keep these bytes as is for the moment.
2104 They may be decoded by post-read-conversion. */
2105 int dim, M, L;
2106 int size, required;
2107 int produced_chars;
2108
2109 ONE_MORE_BYTE (dim);
2110 ONE_MORE_BYTE (M);
2111 ONE_MORE_BYTE (L);
2112 size = ((M - 128) * 128) + (L - 128);
2113 required = 8 + size * 2;
2114 if (dst + required > (dst_bytes ? dst_end : src))
2115 goto label_end_of_loop;
2116 *dst++ = ISO_CODE_ESC;
2117 *dst++ = '%';
2118 *dst++ = '/';
2119 *dst++ = dim;
2120 produced_chars = 4;
2121 dst += CHAR_STRING (M, dst), produced_chars++;
2122 dst += CHAR_STRING (L, dst), produced_chars++;
2123 while (size-- > 0)
2124 {
2125 ONE_MORE_BYTE (c1);
2126 dst += CHAR_STRING (c1, dst), produced_chars++;
2127 }
2128 coding->produced_char += produced_chars;
2129 }
2130 else if (c1 == 'G')
2131 {
2132 unsigned char *d = dst;
2133 int produced_chars;
2134
2135 /* XFree86 extension for embedding UTF-8 in CTEXT:
2136 ESC % G --UTF-8-BYTES-- ESC % @
2137 We keep these bytes as is for the moment.
2138 They may be decoded by post-read-conversion. */
2139 if (d + 6 > (dst_bytes ? dst_end : src))
2140 goto label_end_of_loop;
2141 *d++ = ISO_CODE_ESC;
2142 *d++ = '%';
2143 *d++ = 'G';
2144 produced_chars = 3;
2145 while (d + 1 < (dst_bytes ? dst_end : src))
2146 {
2147 ONE_MORE_BYTE (c1);
2148 if (c1 == ISO_CODE_ESC
2149 && src + 1 < src_end
2150 && src[0] == '%'
2151 && src[1] == '@')
2152 {
2153 src += 2;
2154 break;
2155 }
2156 d += CHAR_STRING (c1, d), produced_chars++;
2157 }
2158 if (d + 3 > (dst_bytes ? dst_end : src))
2159 goto label_end_of_loop;
2160 *d++ = ISO_CODE_ESC;
2161 *d++ = '%';
2162 *d++ = '@';
2163 dst = d;
2164 coding->produced_char += produced_chars + 3;
2165 }
2166 else
2167 goto label_invalid_code;
2168 continue;
2169
2007 default: 2170 default:
2008 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION)) 2171 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2009 goto label_invalid_code; 2172 goto label_invalid_code;
2010 if (c1 >= 0x28 && c1 <= 0x2B) 2173 if (c1 >= 0x28 && c1 <= 0x2B)
2011 { /* designation of DIMENSION1_CHARS94 character set */ 2174 { /* designation of DIMENSION1_CHARS94 character set */
2044 coding->errors++; 2207 coding->errors++;
2045 if (COMPOSING_P (coding)) 2208 if (COMPOSING_P (coding))
2046 DECODE_COMPOSITION_END ('1'); 2209 DECODE_COMPOSITION_END ('1');
2047 src = src_base; 2210 src = src_base;
2048 c = *src++; 2211 c = *src++;
2212 if (! NILP (translation_table))
2213 c = translate_char (translation_table, c, 0, 0, 0);
2049 EMIT_CHAR (c); 2214 EMIT_CHAR (c);
2050 } 2215 }
2051 2216
2052 label_end_of_loop: 2217 label_end_of_loop:
2053 coding->consumed = coding->consumed_char = src_base - source; 2218 coding->consumed = coding->consumed_char = src_base - source;
2267 } while (0) 2432 } while (0)
2268 2433
2269 2434
2270 /* Instead of encoding character C, produce one or two `?'s. */ 2435 /* Instead of encoding character C, produce one or two `?'s. */
2271 2436
2272 #define ENCODE_UNSAFE_CHARACTER(c) \ 2437 #define ENCODE_UNSAFE_CHARACTER(c) \
2273 do { \ 2438 do { \
2274 ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ 2439 ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
2275 if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \ 2440 if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
2276 ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ 2441 ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER); \
2277 } while (0) 2442 } while (0)
2278 2443
2279 2444
2280 /* Produce designation and invocation codes at a place pointed by DST 2445 /* Produce designation and invocation codes at a place pointed by DST
2281 to use CHARSET. The element `spec.iso2022' of *CODING is updated. 2446 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
2438 2603
2439 static unsigned char * 2604 static unsigned char *
2440 encode_designation_at_bol (coding, translation_table, src, src_end, dst) 2605 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2441 struct coding_system *coding; 2606 struct coding_system *coding;
2442 Lisp_Object translation_table; 2607 Lisp_Object translation_table;
2443 unsigned char *src, *src_end, *dst; 2608 const unsigned char *src, *src_end;
2609 unsigned char *dst;
2444 { 2610 {
2445 int charset, c, found = 0, reg; 2611 int charset, c, found = 0, reg;
2446 /* Table of charsets to be designated to each graphic register. */ 2612 /* Table of charsets to be designated to each graphic register. */
2447 int r[4]; 2613 int r[4];
2448 2614
2479 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ 2645 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
2480 2646
2481 static void 2647 static void
2482 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) 2648 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2483 struct coding_system *coding; 2649 struct coding_system *coding;
2484 unsigned char *source, *destination; 2650 const unsigned char *source;
2651 unsigned char *destination;
2485 int src_bytes, dst_bytes; 2652 int src_bytes, dst_bytes;
2486 { 2653 {
2487 unsigned char *src = source; 2654 const unsigned char *src = source;
2488 unsigned char *src_end = source + src_bytes; 2655 const unsigned char *src_end = source + src_bytes;
2489 unsigned char *dst = destination; 2656 unsigned char *dst = destination;
2490 unsigned char *dst_end = destination + dst_bytes; 2657 unsigned char *dst_end = destination + dst_bytes;
2491 /* Since the maximum bytes produced by each loop is 20, we subtract 19 2658 /* Since the maximum bytes produced by each loop is 20, we subtract 19
2492 from DST_END to assure overflow checking is necessary only at the 2659 from DST_END to assure overflow checking is necessary only at the
2493 head of loop. */ 2660 head of loop. */
2495 /* SRC_BASE remembers the start position in source in each loop. 2662 /* SRC_BASE remembers the start position in source in each loop.
2496 The loop will be exited when there's not enough source text to 2663 The loop will be exited when there's not enough source text to
2497 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when 2664 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2498 there's not enough destination area to produce encoded codes 2665 there's not enough destination area to produce encoded codes
2499 (within macro EMIT_BYTES). */ 2666 (within macro EMIT_BYTES). */
2500 unsigned char *src_base; 2667 const unsigned char *src_base;
2501 int c; 2668 int c;
2502 Lisp_Object translation_table; 2669 Lisp_Object translation_table;
2503 Lisp_Object safe_chars; 2670 Lisp_Object safe_chars;
2671
2672 if (coding->flags & CODING_FLAG_ISO_SAFE)
2673 coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2504 2674
2505 safe_chars = coding_safe_chars (coding->symbol); 2675 safe_chars = coding_safe_chars (coding->symbol);
2506 2676
2507 if (NILP (Venable_character_translation)) 2677 if (NILP (Venable_character_translation))
2508 translation_table = Qnil; 2678 translation_table = Qnil;
2567 ENCODE_COMPOSITION_RULE (c); 2737 ENCODE_COMPOSITION_RULE (c);
2568 coding->composition_rule_follows = 0; 2738 coding->composition_rule_follows = 0;
2569 } 2739 }
2570 else 2740 else
2571 { 2741 {
2572 if (coding->flags & CODING_FLAG_ISO_SAFE 2742 if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2573 && ! CODING_SAFE_CHAR_P (safe_chars, c)) 2743 && ! CODING_SAFE_CHAR_P (safe_chars, c))
2574 ENCODE_UNSAFE_CHARACTER (c); 2744 ENCODE_UNSAFE_CHARACTER (c);
2575 else 2745 else
2576 ENCODE_ISO_CHARACTER (c); 2746 ENCODE_ISO_CHARACTER (c);
2577 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) 2747 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2636 else if (SINGLE_BYTE_CHAR_P (c)) 2806 else if (SINGLE_BYTE_CHAR_P (c))
2637 { 2807 {
2638 *dst++ = c; 2808 *dst++ = c;
2639 coding->errors++; 2809 coding->errors++;
2640 } 2810 }
2641 else if (coding->flags & CODING_FLAG_ISO_SAFE 2811 else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2642 && ! CODING_SAFE_CHAR_P (safe_chars, c)) 2812 && ! CODING_SAFE_CHAR_P (safe_chars, c))
2643 ENCODE_UNSAFE_CHARACTER (c); 2813 ENCODE_UNSAFE_CHARACTER (c);
2644 else 2814 else
2645 ENCODE_ISO_CHARACTER (c); 2815 ENCODE_ISO_CHARACTER (c);
2646 2816
2863 detect_coding_utf_16 (src, src_end, multibytep) 3033 detect_coding_utf_16 (src, src_end, multibytep)
2864 unsigned char *src, *src_end; 3034 unsigned char *src, *src_end;
2865 int multibytep; 3035 int multibytep;
2866 { 3036 {
2867 unsigned char c1, c2; 3037 unsigned char c1, c2;
2868 /* Dummy for TWO_MORE_BYTES. */ 3038 /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */
2869 struct coding_system dummy_coding; 3039 struct coding_system dummy_coding;
2870 struct coding_system *coding = &dummy_coding; 3040 struct coding_system *coding = &dummy_coding;
2871 3041
2872 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); 3042 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
2873 ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep); 3043 ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
2886 3056
2887 static void 3057 static void
2888 decode_coding_sjis_big5 (coding, source, destination, 3058 decode_coding_sjis_big5 (coding, source, destination,
2889 src_bytes, dst_bytes, sjis_p) 3059 src_bytes, dst_bytes, sjis_p)
2890 struct coding_system *coding; 3060 struct coding_system *coding;
2891 unsigned char *source, *destination; 3061 const unsigned char *source;
3062 unsigned char *destination;
2892 int src_bytes, dst_bytes; 3063 int src_bytes, dst_bytes;
2893 int sjis_p; 3064 int sjis_p;
2894 { 3065 {
2895 unsigned char *src = source; 3066 const unsigned char *src = source;
2896 unsigned char *src_end = source + src_bytes; 3067 const unsigned char *src_end = source + src_bytes;
2897 unsigned char *dst = destination; 3068 unsigned char *dst = destination;
2898 unsigned char *dst_end = destination + dst_bytes; 3069 unsigned char *dst_end = destination + dst_bytes;
2899 /* SRC_BASE remembers the start position in source in each loop. 3070 /* SRC_BASE remembers the start position in source in each loop.
2900 The loop will be exited when there's not enough source code 3071 The loop will be exited when there's not enough source code
2901 (within macro ONE_MORE_BYTE), or when there's not enough 3072 (within macro ONE_MORE_BYTE), or when there's not enough
2902 destination area to produce a character (within macro 3073 destination area to produce a character (within macro
2903 EMIT_CHAR). */ 3074 EMIT_CHAR). */
2904 unsigned char *src_base; 3075 const unsigned char *src_base;
2905 Lisp_Object translation_table; 3076 Lisp_Object translation_table;
2906 3077
2907 if (NILP (Venable_character_translation)) 3078 if (NILP (Venable_character_translation))
2908 translation_table = Qnil; 3079 translation_table = Qnil;
2909 else 3080 else
2914 } 3085 }
2915 3086
2916 coding->produced_char = 0; 3087 coding->produced_char = 0;
2917 while (1) 3088 while (1)
2918 { 3089 {
2919 int c, charset, c1, c2; 3090 int c, charset, c1, c2 = 0;
2920 3091
2921 src_base = src; 3092 src_base = src;
2922 ONE_MORE_BYTE (c1); 3093 ONE_MORE_BYTE (c1);
2923 3094
2924 if (c1 < 0x80) 3095 if (c1 < 0x80)
3079 } 3250 }
3080 else if (charset == charset_katakana_jisx0201) 3251 else if (charset == charset_katakana_jisx0201)
3081 EMIT_ONE_BYTE (c1 | 0x80); 3252 EMIT_ONE_BYTE (c1 | 0x80);
3082 else if (charset == charset_latin_jisx0201) 3253 else if (charset == charset_latin_jisx0201)
3083 EMIT_ONE_BYTE (c1); 3254 EMIT_ONE_BYTE (c1);
3255 else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3256 {
3257 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3258 if (CHARSET_WIDTH (charset) > 1)
3259 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3260 }
3084 else 3261 else
3085 /* There's no way other than producing the internal 3262 /* There's no way other than producing the internal
3086 codes as is. */ 3263 codes as is. */
3087 EMIT_BYTES (src_base, src); 3264 EMIT_BYTES (src_base, src);
3088 } 3265 }
3091 if (charset == charset_big5_1 || charset == charset_big5_2) 3268 if (charset == charset_big5_1 || charset == charset_big5_2)
3092 { 3269 {
3093 ENCODE_BIG5 (charset, c1, c2, c1, c2); 3270 ENCODE_BIG5 (charset, c1, c2, c1, c2);
3094 EMIT_TWO_BYTES (c1, c2); 3271 EMIT_TWO_BYTES (c1, c2);
3095 } 3272 }
3273 else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3274 {
3275 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3276 if (CHARSET_WIDTH (charset) > 1)
3277 EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3278 }
3096 else 3279 else
3097 /* There's no way other than producing the internal 3280 /* There's no way other than producing the internal
3098 codes as is. */ 3281 codes as is. */
3099 EMIT_BYTES (src_base, src); 3282 EMIT_BYTES (src_base, src);
3100 } 3283 }
3147 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 3330 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3148 3331
3149 static void 3332 static void
3150 decode_eol (coding, source, destination, src_bytes, dst_bytes) 3333 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3151 struct coding_system *coding; 3334 struct coding_system *coding;
3152 unsigned char *source, *destination; 3335 const unsigned char *source;
3336 unsigned char *destination;
3153 int src_bytes, dst_bytes; 3337 int src_bytes, dst_bytes;
3154 { 3338 {
3155 unsigned char *src = source; 3339 const unsigned char *src = source;
3156 unsigned char *dst = destination; 3340 unsigned char *dst = destination;
3157 unsigned char *src_end = src + src_bytes; 3341 const unsigned char *src_end = src + src_bytes;
3158 unsigned char *dst_end = dst + dst_bytes; 3342 unsigned char *dst_end = dst + dst_bytes;
3159 Lisp_Object translation_table; 3343 Lisp_Object translation_table;
3160 /* SRC_BASE remembers the start position in source in each loop. 3344 /* SRC_BASE remembers the start position in source in each loop.
3161 The loop will be exited when there's not enough source code 3345 The loop will be exited when there's not enough source code
3162 (within macro ONE_MORE_BYTE), or when there's not enough 3346 (within macro ONE_MORE_BYTE), or when there's not enough
3163 destination area to produce a character (within macro 3347 destination area to produce a character (within macro
3164 EMIT_CHAR). */ 3348 EMIT_CHAR). */
3165 unsigned char *src_base; 3349 const unsigned char *src_base;
3166 int c; 3350 int c;
3167 3351
3168 translation_table = Qnil; 3352 translation_table = Qnil;
3169 switch (coding->eol_type) 3353 switch (coding->eol_type)
3170 { 3354 {
3873 `cn-big5' by default. 4057 `cn-big5' by default.
3874 4058
3875 o coding-category-utf-8 4059 o coding-category-utf-8
3876 4060
3877 The category for a coding system which has the same code range 4061 The category for a coding system which has the same code range
3878 as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp 4062 as UTF-8 (cf. RFC3629). Assigned the coding-system (Lisp
3879 symbol) `utf-8' by default. 4063 symbol) `utf-8' by default.
3880 4064
3881 o coding-category-utf-16-be 4065 o coding-category-utf-16-be
3882 4066
3883 The category for a coding system in which a text has an 4067 The category for a coding system in which a text has an
4358 int src_bytes; 4542 int src_bytes;
4359 { 4543 {
4360 int magnification; 4544 int magnification;
4361 4545
4362 if (coding->type == coding_type_ccl) 4546 if (coding->type == coding_type_ccl)
4363 magnification = coding->spec.ccl.encoder.buf_magnification; 4547 {
4548 magnification = coding->spec.ccl.encoder.buf_magnification;
4549 if (coding->eol_type == CODING_EOL_CRLF)
4550 magnification *= 2;
4551 }
4364 else if (CODING_REQUIRE_ENCODING (coding)) 4552 else if (CODING_REQUIRE_ENCODING (coding))
4365 magnification = 3; 4553 magnification = 3;
4366 else 4554 else
4367 magnification = 1; 4555 magnification = 1;
4368 4556
4374 { 4562 {
4375 int size; /* size of data. */ 4563 int size; /* size of data. */
4376 int on_stack; /* 1 if allocated by alloca. */ 4564 int on_stack; /* 1 if allocated by alloca. */
4377 unsigned char *data; 4565 unsigned char *data;
4378 }; 4566 };
4379
4380 /* Don't use alloca for allocating memory space larger than this, lest
4381 we overflow their stack. */
4382 #define MAX_ALLOCA 16*1024
4383 4567
4384 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */ 4568 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */
4385 #define allocate_conversion_buffer(buf, len) \ 4569 #define allocate_conversion_buffer(buf, len) \
4386 do { \ 4570 do { \
4387 if (len < MAX_ALLOCA) \ 4571 if (len < MAX_ALLOCA) \
4443 that, setup proper information in the structure CCL. */ 4627 that, setup proper information in the structure CCL. */
4444 ccl->eol_type = coding->eol_type; 4628 ccl->eol_type = coding->eol_type;
4445 if (ccl->eol_type ==CODING_EOL_UNDECIDED) 4629 if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4446 ccl->eol_type = CODING_EOL_LF; 4630 ccl->eol_type = CODING_EOL_LF;
4447 ccl->cr_consumed = coding->spec.ccl.cr_carryover; 4631 ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4448 } 4632 ccl->eight_bit_control = coding->dst_multibyte;
4633 }
4634 else
4635 ccl->eight_bit_control = 1;
4449 ccl->multibyte = coding->src_multibyte; 4636 ccl->multibyte = coding->src_multibyte;
4450 if (coding->spec.ccl.eight_bit_carryover[0] != 0) 4637 if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4451 { 4638 {
4452 /* Move carryover bytes to DESTINATION. */ 4639 /* Move carryover bytes to DESTINATION. */
4453 unsigned char *p = coding->spec.ccl.eight_bit_carryover; 4640 unsigned char *p = coding->spec.ccl.eight_bit_carryover;
5165 if (encodep) shrink_encoding_region (beg, end, coding, str); \ 5352 if (encodep) shrink_encoding_region (beg, end, coding, str); \
5166 else shrink_decoding_region (beg, end, coding, str); \ 5353 else shrink_decoding_region (beg, end, coding, str); \
5167 } \ 5354 } \
5168 } while (0) 5355 } while (0)
5169 5356
5357 /* ARG is (CODING BUFFER ...) where CODING is what to be set in
5358 Vlast_coding_system_used and the remaining elements are buffers to
5359 kill. */
5170 static Lisp_Object 5360 static Lisp_Object
5171 code_convert_region_unwind (dummy) 5361 code_convert_region_unwind (arg)
5172 Lisp_Object dummy; 5362 Lisp_Object arg;
5173 { 5363 {
5364 struct gcpro gcpro1;
5365 GCPRO1 (arg);
5366
5174 inhibit_pre_post_conversion = 0; 5367 inhibit_pre_post_conversion = 0;
5368 Vlast_coding_system_used = XCAR (arg);
5369 for (arg = XCDR (arg); ! NILP (arg); arg = XCDR (arg))
5370 Fkill_buffer (XCAR (arg));
5371
5372 UNGCPRO;
5175 return Qnil; 5373 return Qnil;
5176 } 5374 }
5177 5375
5178 /* Store information about all compositions in the range FROM and TO 5376 /* Store information about all compositions in the range FROM and TO
5179 of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a 5377 of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a
5280 { 5478 {
5281 int *data = cmp_data->data + i; 5479 int *data = cmp_data->data + i;
5282 enum composition_method method = (enum composition_method) data[3]; 5480 enum composition_method method = (enum composition_method) data[3];
5283 Lisp_Object components; 5481 Lisp_Object components;
5284 5482
5483 if (data[0] < 0 || i + data[0] > cmp_data->used)
5484 /* Invalid composition data. */
5485 break;
5486
5285 if (method == COMPOSITION_RELATIVE) 5487 if (method == COMPOSITION_RELATIVE)
5286 components = Qnil; 5488 components = Qnil;
5287 else 5489 else
5288 { 5490 {
5289 int len = data[0] - 4, j; 5491 int len = data[0] - 4, j;
5290 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; 5492 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5291 5493
5494 if (method == COMPOSITION_WITH_RULE_ALTCHARS
5495 && len % 2 == 0)
5496 len --;
5497 if (len < 1)
5498 /* Invalid composition data. */
5499 break;
5292 for (j = 0; j < len; j++) 5500 for (j = 0; j < len; j++)
5293 args[j] = make_number (data[4 + j]); 5501 args[j] = make_number (data[4 + j]);
5294 components = (method == COMPOSITION_WITH_ALTCHARS 5502 components = (method == COMPOSITION_WITH_ALTCHARS
5295 ? Fstring (len, args) : Fvector (len, args)); 5503 ? Fstring (len, args)
5504 : Fvector (len, args));
5296 } 5505 }
5297 compose_text (data[1], data[2], components, Qnil, obj); 5506 compose_text (data[1], data[2], components, Qnil, obj);
5298 } 5507 }
5299 cmp_data = cmp_data->next; 5508 cmp_data = cmp_data->next;
5300 } 5509 }
5410 /* The function in pre-write-conversion may put a new text in a 5619 /* The function in pre-write-conversion may put a new text in a
5411 new buffer. */ 5620 new buffer. */
5412 struct buffer *prev = current_buffer; 5621 struct buffer *prev = current_buffer;
5413 Lisp_Object new; 5622 Lisp_Object new;
5414 5623
5415 record_unwind_protect (code_convert_region_unwind, Qnil); 5624 record_unwind_protect (code_convert_region_unwind,
5625 Fcons (Vlast_coding_system_used, Qnil));
5416 /* We should not call any more pre-write/post-read-conversion 5626 /* We should not call any more pre-write/post-read-conversion
5417 functions while this pre-write-conversion is running. */ 5627 functions while this pre-write-conversion is running. */
5418 inhibit_pre_post_conversion = 1; 5628 inhibit_pre_post_conversion = 1;
5419 call2 (coding->pre_write_conversion, 5629 call2 (coding->pre_write_conversion,
5420 make_number (from), make_number (to)); 5630 make_number (from), make_number (to));
5461 coding_save_composition (coding, from, to, Fcurrent_buffer ()); 5671 coding_save_composition (coding, from, to, Fcurrent_buffer ());
5462 else 5672 else
5463 coding_allocate_composition_data (coding, from); 5673 coding_allocate_composition_data (coding, from);
5464 } 5674 }
5465 5675
5466 /* Try to skip the heading and tailing ASCIIs. */ 5676 /* Try to skip the heading and tailing ASCIIs. We can't skip them
5467 if (coding->type != coding_type_ccl) 5677 if we must run CCL program or there are compositions to
5678 encode. */
5679 if (coding->type != coding_type_ccl
5680 && (! coding->cmp_data || coding->cmp_data->used == 0))
5468 { 5681 {
5469 int from_byte_orig = from_byte, to_byte_orig = to_byte; 5682 int from_byte_orig = from_byte, to_byte_orig = to_byte;
5470 5683
5471 if (from < GPT && GPT < to) 5684 if (from < GPT && GPT < to)
5472 move_gap_both (from, from_byte); 5685 move_gap_both (from, from_byte);
5478 coding->produced = len_byte; 5691 coding->produced = len_byte;
5479 coding->produced_char = len; 5692 coding->produced_char = len;
5480 if (!replace) 5693 if (!replace)
5481 /* We must record and adjust for this new text now. */ 5694 /* We must record and adjust for this new text now. */
5482 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); 5695 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5696 coding_free_composition_data (coding);
5483 return 0; 5697 return 0;
5484 } 5698 }
5485 5699
5486 head_skip = from_byte - from_byte_orig; 5700 head_skip = from_byte - from_byte_orig;
5487 tail_skip = to_byte_orig - to_byte; 5701 tail_skip = to_byte_orig - to_byte;
5688 NEW bytes (coding->produced). To convert the remaining 5902 NEW bytes (coding->produced). To convert the remaining
5689 LEN bytes, we may need REQUIRE bytes of gap, where: 5903 LEN bytes, we may need REQUIRE bytes of gap, where:
5690 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG) 5904 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5691 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG 5905 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5692 Here, we are sure that NEW >= ORIG. */ 5906 Here, we are sure that NEW >= ORIG. */
5693 float ratio;
5694 5907
5695 if (coding->produced <= coding->consumed) 5908 if (coding->produced <= coding->consumed)
5696 { 5909 {
5697 /* This happens because of CCL-based coding system with 5910 /* This happens because of CCL-based coding system with
5698 eol-type CRLF. */ 5911 eol-type CRLF. */
5699 require = 0; 5912 require = 0;
5700 } 5913 }
5701 else 5914 else
5702 { 5915 {
5703 ratio = (coding->produced - coding->consumed) / coding->consumed; 5916 float ratio = coding->produced - coding->consumed;
5917 ratio /= coding->consumed;
5704 require = len_byte * ratio; 5918 require = len_byte * ratio;
5705 } 5919 }
5706 first = 0; 5920 first = 0;
5707 } 5921 }
5708 if ((src - dst) < (require + 2000)) 5922 if ((src - dst) < (require + 2000))
5768 5982
5769 if (! inhibit_pre_post_conversion 5983 if (! inhibit_pre_post_conversion
5770 && ! encodep && ! NILP (coding->post_read_conversion)) 5984 && ! encodep && ! NILP (coding->post_read_conversion))
5771 { 5985 {
5772 Lisp_Object val; 5986 Lisp_Object val;
5987 Lisp_Object saved_coding_system;
5773 5988
5774 if (from != PT) 5989 if (from != PT)
5775 TEMP_SET_PT_BOTH (from, from_byte); 5990 TEMP_SET_PT_BOTH (from, from_byte);
5776 prev_Z = Z; 5991 prev_Z = Z;
5777 record_unwind_protect (code_convert_region_unwind, Qnil); 5992 record_unwind_protect (code_convert_region_unwind,
5993 Fcons (Vlast_coding_system_used, Qnil));
5994 saved_coding_system = Vlast_coding_system_used;
5995 Vlast_coding_system_used = coding->symbol;
5778 /* We should not call any more pre-write/post-read-conversion 5996 /* We should not call any more pre-write/post-read-conversion
5779 functions while this post-read-conversion is running. */ 5997 functions while this post-read-conversion is running. */
5780 inhibit_pre_post_conversion = 1; 5998 inhibit_pre_post_conversion = 1;
5781 val = call1 (coding->post_read_conversion, make_number (inserted)); 5999 val = call1 (coding->post_read_conversion, make_number (inserted));
5782 inhibit_pre_post_conversion = 0; 6000 inhibit_pre_post_conversion = 0;
6001 coding->symbol = Vlast_coding_system_used;
6002 Vlast_coding_system_used = saved_coding_system;
5783 /* Discard the unwind protect. */ 6003 /* Discard the unwind protect. */
5784 specpdl_ptr--; 6004 specpdl_ptr--;
5785 CHECK_NUMBER (val); 6005 CHECK_NUMBER (val);
5786 inserted += Z - prev_Z; 6006 inserted += Z - prev_Z;
5787 } 6007 }
5809 } 6029 }
5810 6030
5811 return 0; 6031 return 0;
5812 } 6032 }
5813 6033
6034 /* Name (or base name) of work buffer for code conversion. */
6035 static Lisp_Object Vcode_conversion_workbuf_name;
6036
6037 /* Set the current buffer to the working buffer prepared for
6038 code-conversion. MULTIBYTE specifies the multibyteness of the
6039 buffer. Return the buffer we set if it must be killed after use.
6040 Otherwise return Qnil. */
6041
6042 static Lisp_Object
6043 set_conversion_work_buffer (multibyte)
6044 int multibyte;
6045 {
6046 Lisp_Object buffer, buffer_to_kill;
6047 struct buffer *buf;
6048
6049 buffer = Fget_buffer_create (Vcode_conversion_workbuf_name);
6050 buf = XBUFFER (buffer);
6051 if (buf == current_buffer)
6052 {
6053 /* As we are already in the work buffer, we must generate a new
6054 buffer for the work. */
6055 Lisp_Object name;
6056
6057 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6058 buffer = buffer_to_kill = Fget_buffer_create (name);
6059 buf = XBUFFER (buffer);
6060 }
6061 else
6062 buffer_to_kill = Qnil;
6063
6064 delete_all_overlays (buf);
6065 buf->directory = current_buffer->directory;
6066 buf->read_only = Qnil;
6067 buf->filename = Qnil;
6068 buf->undo_list = Qt;
6069 eassert (buf->overlays_before == NULL);
6070 eassert (buf->overlays_after == NULL);
6071 set_buffer_internal (buf);
6072 if (BEG != BEGV || Z != ZV)
6073 Fwiden ();
6074 del_range_2 (BEG, BEG_BYTE, Z, Z_BYTE, 0);
6075 buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
6076 return buffer_to_kill;
6077 }
6078
5814 Lisp_Object 6079 Lisp_Object
5815 run_pre_post_conversion_on_str (str, coding, encodep) 6080 run_pre_post_conversion_on_str (str, coding, encodep)
5816 Lisp_Object str; 6081 Lisp_Object str;
5817 struct coding_system *coding; 6082 struct coding_system *coding;
5818 int encodep; 6083 int encodep;
5819 { 6084 {
5820 int count = SPECPDL_INDEX (); 6085 int count = SPECPDL_INDEX ();
5821 struct gcpro gcpro1, gcpro2; 6086 struct gcpro gcpro1, gcpro2;
5822 int multibyte = STRING_MULTIBYTE (str); 6087 int multibyte = STRING_MULTIBYTE (str);
5823 Lisp_Object buffer;
5824 struct buffer *buf;
5825 Lisp_Object old_deactivate_mark; 6088 Lisp_Object old_deactivate_mark;
6089 Lisp_Object buffer_to_kill;
6090 Lisp_Object unwind_arg;
5826 6091
5827 record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); 6092 record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5828 record_unwind_protect (code_convert_region_unwind, Qnil);
5829 /* It is not crucial to specbind this. */ 6093 /* It is not crucial to specbind this. */
5830 old_deactivate_mark = Vdeactivate_mark; 6094 old_deactivate_mark = Vdeactivate_mark;
5831 GCPRO2 (str, old_deactivate_mark); 6095 GCPRO2 (str, old_deactivate_mark);
5832 6096
5833 buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
5834 buf = XBUFFER (buffer);
5835
5836 buf->directory = current_buffer->directory;
5837 buf->read_only = Qnil;
5838 buf->filename = Qnil;
5839 buf->undo_list = Qt;
5840 buf->overlays_before = Qnil;
5841 buf->overlays_after = Qnil;
5842
5843 set_buffer_internal (buf);
5844 /* We must insert the contents of STR as is without 6097 /* We must insert the contents of STR as is without
5845 unibyte<->multibyte conversion. For that, we adjust the 6098 unibyte<->multibyte conversion. For that, we adjust the
5846 multibyteness of the working buffer to that of STR. */ 6099 multibyteness of the working buffer to that of STR. */
5847 Ferase_buffer (); 6100 buffer_to_kill = set_conversion_work_buffer (multibyte);
5848 buf->enable_multibyte_characters = multibyte ? Qt : Qnil; 6101 if (NILP (buffer_to_kill))
6102 unwind_arg = Fcons (Vlast_coding_system_used, Qnil);
6103 else
6104 unwind_arg = list2 (Vlast_coding_system_used, buffer_to_kill);
6105 record_unwind_protect (code_convert_region_unwind, unwind_arg);
5849 6106
5850 insert_from_string (str, 0, 0, 6107 insert_from_string (str, 0, 0,
5851 SCHARS (str), SBYTES (str), 0); 6108 SCHARS (str), SBYTES (str), 0);
5852 UNGCPRO; 6109 UNGCPRO;
5853 inhibit_pre_post_conversion = 1; 6110 inhibit_pre_post_conversion = 1;
5854 if (encodep) 6111 if (encodep)
5855 call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z)); 6112 {
6113 struct buffer *prev = current_buffer;
6114
6115 call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
6116 if (prev != current_buffer)
6117 /* We must kill the current buffer too. */
6118 Fsetcdr (unwind_arg, Fcons (Fcurrent_buffer (), XCDR (unwind_arg)));
6119 }
5856 else 6120 else
5857 { 6121 {
6122 Vlast_coding_system_used = coding->symbol;
5858 TEMP_SET_PT_BOTH (BEG, BEG_BYTE); 6123 TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5859 call1 (coding->post_read_conversion, make_number (Z - BEG)); 6124 call1 (coding->post_read_conversion, make_number (Z - BEG));
6125 coding->symbol = Vlast_coding_system_used;
5860 } 6126 }
5861 inhibit_pre_post_conversion = 0; 6127 inhibit_pre_post_conversion = 0;
5862 Vdeactivate_mark = old_deactivate_mark; 6128 Vdeactivate_mark = old_deactivate_mark;
5863 str = make_buffer_string (BEG, Z, 1); 6129 str = make_buffer_string (BEG, Z, 1);
5864 return unbind_to (count, str); 6130 return unbind_to (count, str);
5865 } 6131 }
6132
6133
6134 /* Run pre-write-conversion function of CODING on NCHARS/NBYTES
6135 text in *STR. *SIZE is the allocated bytes for STR. As it
6136 is intended that this function is called from encode_terminal_code,
6137 the pre-write-conversion function is run by safe_call and thus
6138 "Error during redisplay: ..." is logged when an error occurs.
6139
6140 Store the resulting text in *STR and set CODING->produced_char and
6141 CODING->produced to the number of characters and bytes
6142 respectively. If the size of *STR is too small, enlarge it by
6143 xrealloc and update *STR and *SIZE. */
6144
6145 void
6146 run_pre_write_conversin_on_c_str (str, size, nchars, nbytes, coding)
6147 unsigned char **str;
6148 int *size, nchars, nbytes;
6149 struct coding_system *coding;
6150 {
6151 struct gcpro gcpro1, gcpro2;
6152 struct buffer *cur = current_buffer;
6153 struct buffer *prev;
6154 Lisp_Object old_deactivate_mark, old_last_coding_system_used;
6155 Lisp_Object args[3];
6156 Lisp_Object buffer_to_kill;
6157
6158 /* It is not crucial to specbind this. */
6159 old_deactivate_mark = Vdeactivate_mark;
6160 old_last_coding_system_used = Vlast_coding_system_used;
6161 GCPRO2 (old_deactivate_mark, old_last_coding_system_used);
6162
6163 /* We must insert the contents of STR as is without
6164 unibyte<->multibyte conversion. For that, we adjust the
6165 multibyteness of the working buffer to that of STR. */
6166 buffer_to_kill = set_conversion_work_buffer (coding->src_multibyte);
6167 insert_1_both (*str, nchars, nbytes, 0, 0, 0);
6168 UNGCPRO;
6169 inhibit_pre_post_conversion = 1;
6170 prev = current_buffer;
6171 args[0] = coding->pre_write_conversion;
6172 args[1] = make_number (BEG);
6173 args[2] = make_number (Z);
6174 safe_call (3, args);
6175 inhibit_pre_post_conversion = 0;
6176 Vdeactivate_mark = old_deactivate_mark;
6177 Vlast_coding_system_used = old_last_coding_system_used;
6178 coding->produced_char = Z - BEG;
6179 coding->produced = Z_BYTE - BEG_BYTE;
6180 if (coding->produced > *size)
6181 {
6182 *size = coding->produced;
6183 *str = xrealloc (*str, *size);
6184 }
6185 if (BEG < GPT && GPT < Z)
6186 move_gap (BEG);
6187 bcopy (BEG_ADDR, *str, coding->produced);
6188 coding->src_multibyte
6189 = ! NILP (current_buffer->enable_multibyte_characters);
6190 if (prev != current_buffer)
6191 Fkill_buffer (Fcurrent_buffer ());
6192 set_buffer_internal (cur);
6193 if (! NILP (buffer_to_kill))
6194 Fkill_buffer (buffer_to_kill);
6195 }
6196
5866 6197
5867 Lisp_Object 6198 Lisp_Object
5868 decode_coding_string (str, coding, nocopy) 6199 decode_coding_string (str, coding, nocopy)
5869 Lisp_Object str; 6200 Lisp_Object str;
5870 struct coding_system *coding; 6201 struct coding_system *coding;
5938 if (from == to_byte) 6269 if (from == to_byte)
5939 require_decoding = 0; 6270 require_decoding = 0;
5940 shrinked_bytes = from + (SBYTES (str) - to_byte); 6271 shrinked_bytes = from + (SBYTES (str) - to_byte);
5941 } 6272 }
5942 6273
5943 if (!require_decoding) 6274 if (!require_decoding
6275 && !(SYMBOLP (coding->post_read_conversion)
6276 && !NILP (Ffboundp (coding->post_read_conversion))))
5944 { 6277 {
5945 coding->consumed = SBYTES (str); 6278 coding->consumed = SBYTES (str);
5946 coding->consumed_char = SCHARS (str); 6279 coding->consumed_char = SCHARS (str);
5947 if (coding->dst_multibyte) 6280 if (coding->dst_multibyte)
5948 { 6281 {
5968 consumed += coding->consumed; 6301 consumed += coding->consumed;
5969 consumed_char += coding->consumed_char; 6302 consumed_char += coding->consumed_char;
5970 produced += coding->produced; 6303 produced += coding->produced;
5971 produced_char += coding->produced_char; 6304 produced_char += coding->produced_char;
5972 if (result == CODING_FINISH_NORMAL 6305 if (result == CODING_FINISH_NORMAL
6306 || result == CODING_FINISH_INTERRUPT
5973 || (result == CODING_FINISH_INSUFFICIENT_SRC 6307 || (result == CODING_FINISH_INSUFFICIENT_SRC
5974 && coding->consumed == 0)) 6308 && coding->consumed == 0))
5975 break; 6309 break;
5976 if (result == CODING_FINISH_INSUFFICIENT_CMP) 6310 if (result == CODING_FINISH_INSUFFICIENT_CMP)
5977 coding_allocate_composition_data (coding, from + produced_char); 6311 coding_allocate_composition_data (coding, from + produced_char);
6037 STRING_COPYIN (newstr, from + produced, 6371 STRING_COPYIN (newstr, from + produced,
6038 SDATA (str) + to_byte, 6372 SDATA (str) + to_byte,
6039 shrinked_bytes - from); 6373 shrinked_bytes - from);
6040 free_conversion_buffer (&buf); 6374 free_conversion_buffer (&buf);
6041 6375
6376 coding->consumed += shrinked_bytes;
6377 coding->consumed_char += shrinked_bytes;
6378 coding->produced += shrinked_bytes;
6379 coding->produced_char += shrinked_bytes;
6380
6042 if (coding->cmp_data && coding->cmp_data->used) 6381 if (coding->cmp_data && coding->cmp_data->used)
6043 coding_restore_composition (coding, newstr); 6382 coding_restore_composition (coding, newstr);
6044 coding_free_composition_data (coding); 6383 coding_free_composition_data (coding);
6045 6384
6046 if (SYMBOLP (coding->post_read_conversion) 6385 if (SYMBOLP (coding->post_read_conversion)
6064 Lisp_Object newstr; 6403 Lisp_Object newstr;
6065 int consumed, consumed_char, produced, produced_char; 6404 int consumed, consumed_char, produced, produced_char;
6066 6405
6067 if (SYMBOLP (coding->pre_write_conversion) 6406 if (SYMBOLP (coding->pre_write_conversion)
6068 && !NILP (Ffboundp (coding->pre_write_conversion))) 6407 && !NILP (Ffboundp (coding->pre_write_conversion)))
6069 str = run_pre_post_conversion_on_str (str, coding, 1); 6408 {
6409 str = run_pre_post_conversion_on_str (str, coding, 1);
6410 /* As STR is just newly generated, we don't have to copy it
6411 anymore. */
6412 nocopy = 1;
6413 }
6070 6414
6071 from = 0; 6415 from = 0;
6072 to = SCHARS (str); 6416 to = SCHARS (str);
6073 to_byte = SBYTES (str); 6417 to_byte = SBYTES (str);
6074 6418
6075 /* Encoding routines determine the multibyteness of the source text 6419 /* Encoding routines determine the multibyteness of the source text
6076 by coding->src_multibyte. */ 6420 by coding->src_multibyte. */
6077 coding->src_multibyte = STRING_MULTIBYTE (str); 6421 coding->src_multibyte = SCHARS (str) < SBYTES (str);
6078 coding->dst_multibyte = 0; 6422 coding->dst_multibyte = 0;
6079 if (! CODING_REQUIRE_ENCODING (coding)) 6423 if (! CODING_REQUIRE_ENCODING (coding))
6080 { 6424 goto no_need_of_encoding;
6081 coding->consumed = SBYTES (str);
6082 coding->consumed_char = SCHARS (str);
6083 if (STRING_MULTIBYTE (str))
6084 {
6085 str = Fstring_as_unibyte (str);
6086 nocopy = 1;
6087 }
6088 coding->produced = SBYTES (str);
6089 coding->produced_char = SCHARS (str);
6090 return (nocopy ? str : Fcopy_sequence (str));
6091 }
6092 6425
6093 if (coding->composing != COMPOSITION_DISABLED) 6426 if (coding->composing != COMPOSITION_DISABLED)
6094 coding_save_composition (coding, from, to, str); 6427 coding_save_composition (coding, from, to, str);
6095 6428
6096 /* Try to skip the heading and tailing ASCIIs. */ 6429 /* Try to skip the heading and tailing ASCIIs. We can't skip them
6097 if (coding->type != coding_type_ccl) 6430 if we must run CCL program or there are compositions to
6431 encode. */
6432 if (coding->type != coding_type_ccl
6433 && (! coding->cmp_data || coding->cmp_data->used == 0))
6098 { 6434 {
6099 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str), 6435 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6100 1); 6436 1);
6101 if (from == to_byte) 6437 if (from == to_byte)
6102 return (nocopy ? str : Fcopy_sequence (str)); 6438 {
6439 coding_free_composition_data (coding);
6440 goto no_need_of_encoding;
6441 }
6103 shrinked_bytes = from + (SBYTES (str) - to_byte); 6442 shrinked_bytes = from + (SBYTES (str) - to_byte);
6104 } 6443 }
6105 6444
6106 len = encoding_buffer_size (coding, to_byte - from); 6445 len = encoding_buffer_size (coding, to_byte - from);
6107 allocate_conversion_buffer (buf, len); 6446 allocate_conversion_buffer (buf, len);
6115 consumed += coding->consumed; 6454 consumed += coding->consumed;
6116 consumed_char += coding->consumed_char; 6455 consumed_char += coding->consumed_char;
6117 produced += coding->produced; 6456 produced += coding->produced;
6118 produced_char += coding->produced_char; 6457 produced_char += coding->produced_char;
6119 if (result == CODING_FINISH_NORMAL 6458 if (result == CODING_FINISH_NORMAL
6459 || result == CODING_FINISH_INTERRUPT
6120 || (result == CODING_FINISH_INSUFFICIENT_SRC 6460 || (result == CODING_FINISH_INSUFFICIENT_SRC
6121 && coding->consumed == 0)) 6461 && coding->consumed == 0))
6122 break; 6462 break;
6123 /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */ 6463 /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */
6124 extend_conversion_buffer (&buf); 6464 extend_conversion_buffer (&buf);
6140 6480
6141 free_conversion_buffer (&buf); 6481 free_conversion_buffer (&buf);
6142 coding_free_composition_data (coding); 6482 coding_free_composition_data (coding);
6143 6483
6144 return newstr; 6484 return newstr;
6485
6486 no_need_of_encoding:
6487 coding->consumed = SBYTES (str);
6488 coding->consumed_char = SCHARS (str);
6489 if (STRING_MULTIBYTE (str))
6490 {
6491 if (nocopy)
6492 /* We are sure that STR doesn't contain a multibyte
6493 character. */
6494 STRING_SET_UNIBYTE (str);
6495 else
6496 {
6497 str = Fstring_as_unibyte (str);
6498 nocopy = 1;
6499 }
6500 }
6501 coding->produced = SBYTES (str);
6502 coding->produced_char = SCHARS (str);
6503 return (nocopy ? str : Fcopy_sequence (str));
6145 } 6504 }
6146 6505
6147 6506
6148 #ifdef emacs 6507 #ifdef emacs
6149 /*** 8. Emacs Lisp library functions ***/ 6508 /*** 8. Emacs Lisp library functions ***/
6157 { 6516 {
6158 if (NILP (obj)) 6517 if (NILP (obj))
6159 return Qt; 6518 return Qt;
6160 if (!SYMBOLP (obj)) 6519 if (!SYMBOLP (obj))
6161 return Qnil; 6520 return Qnil;
6521 if (! NILP (Fget (obj, Qcoding_system_define_form)))
6522 return Qt;
6162 /* Get coding-spec vector for OBJ. */ 6523 /* Get coding-spec vector for OBJ. */
6163 obj = Fget (obj, Qcoding_system); 6524 obj = Fget (obj, Qcoding_system);
6164 return ((VECTORP (obj) && XVECTOR (obj)->size == 5) 6525 return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6165 ? Qt : Qnil); 6526 ? Qt : Qnil);
6166 } 6527 }
6198 6559
6199 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, 6560 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6200 1, 1, 0, 6561 1, 1, 0,
6201 doc: /* Check validity of CODING-SYSTEM. 6562 doc: /* Check validity of CODING-SYSTEM.
6202 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. 6563 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6203 It is valid if it is a symbol with a non-nil `coding-system' property. 6564 It is valid if it is nil or a symbol with a non-nil `coding-system' property.
6204 The value of property should be a vector of length 5. */) 6565 The value of this property should be a vector of length 5. */)
6205 (coding_system) 6566 (coding_system)
6206 Lisp_Object coding_system; 6567 Lisp_Object coding_system;
6207 { 6568 {
6208 CHECK_SYMBOL (coding_system); 6569 Lisp_Object define_form;
6570
6571 define_form = Fget (coding_system, Qcoding_system_define_form);
6572 if (! NILP (define_form))
6573 {
6574 Fput (coding_system, Qcoding_system_define_form, Qnil);
6575 safe_eval (define_form);
6576 }
6209 if (!NILP (Fcoding_system_p (coding_system))) 6577 if (!NILP (Fcoding_system_p (coding_system)))
6210 return coding_system; 6578 return coding_system;
6211 while (1) 6579 while (1)
6212 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil)); 6580 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6213 } 6581 }
6275 return (highest ? XCAR (val) : val); 6643 return (highest ? XCAR (val) : val);
6276 } 6644 }
6277 6645
6278 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, 6646 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6279 2, 3, 0, 6647 2, 3, 0,
6280 doc: /* Detect coding system of the text in the region between START and END. 6648 doc: /* Detect how the byte sequence in the region is encoded.
6281 Return a list of possible coding systems ordered by priority. 6649 Return a list of possible coding systems used on decoding a byte
6650 sequence containing the bytes in the region between START and END when
6651 the coding system `undecided' is specified. The list is ordered by
6652 priority decided in the current language environment.
6282 6653
6283 If only ASCII characters are found, it returns a list of single element 6654 If only ASCII characters are found, it returns a list of single element
6284 `undecided' or its subsidiary coding system according to a detected 6655 `undecided' or its subsidiary coding system according to a detected
6285 end-of-line format. 6656 end-of-line format.
6286 6657
6319 ->enable_multibyte_characters)); 6690 ->enable_multibyte_characters));
6320 } 6691 }
6321 6692
6322 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, 6693 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6323 1, 2, 0, 6694 1, 2, 0,
6324 doc: /* Detect coding system of the text in STRING. 6695 doc: /* Detect how the byte sequence in STRING is encoded.
6325 Return a list of possible coding systems ordered by priority. 6696 Return a list of possible coding systems used on decoding a byte
6697 sequence containing the bytes in STRING when the coding system
6698 `undecided' is specified. The list is ordered by priority decided in
6699 the current language environment.
6326 6700
6327 If only ASCII characters are found, it returns a list of single element 6701 If only ASCII characters are found, it returns a list of single element
6328 `undecided' or its subsidiary coding system according to a detected 6702 `undecided' or its subsidiary coding system according to a detected
6329 end-of-line format. 6703 end-of-line format.
6330 6704
6343 SBYTES (string) + 1, 6717 SBYTES (string) + 1,
6344 !NILP (highest), 6718 !NILP (highest),
6345 STRING_MULTIBYTE (string)); 6719 STRING_MULTIBYTE (string));
6346 } 6720 }
6347 6721
6348 /* Return an intersection of lists L1 and L2. */ 6722 /* Subroutine for Ffind_coding_systems_region_internal.
6349
6350 static Lisp_Object
6351 intersection (l1, l2)
6352 Lisp_Object l1, l2;
6353 {
6354 Lisp_Object val = Fcons (Qnil, Qnil), tail;
6355
6356 for (tail = val; CONSP (l1); l1 = XCDR (l1))
6357 {
6358 if (!NILP (Fmemq (XCAR (l1), l2)))
6359 {
6360 XSETCDR (tail, Fcons (XCAR (l1), Qnil));
6361 tail = XCDR (tail);
6362 }
6363 }
6364 return XCDR (val);
6365 }
6366
6367
6368 /* Subroutine for Fsafe_coding_systems_region_internal.
6369 6723
6370 Return a list of coding systems that safely encode the multibyte 6724 Return a list of coding systems that safely encode the multibyte
6371 text between P and PEND. SAFE_CODINGS, if non-nil, is a list of 6725 text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of
6372 possible coding systems. If it is nil, it means that we have not 6726 possible coding systems. If it is nil, it means that we have not
6373 yet found any coding systems. 6727 yet found any coding systems.
6374 6728
6375 WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An 6729 WORK_TABLE a char-table of which element is set to t once the
6376 element of WORK_TABLE is set to t once the element is looked up. 6730 element is looked up.
6377 6731
6378 If a non-ASCII single byte char is found, set 6732 If a non-ASCII single byte char is found, set
6379 *single_byte_char_found to 1. */ 6733 *single_byte_char_found to 1. */
6380 6734
6381 static Lisp_Object 6735 static Lisp_Object
6382 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) 6736 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6383 unsigned char *p, *pend; 6737 unsigned char *p, *pend;
6384 Lisp_Object safe_codings, work_table; 6738 Lisp_Object safe_codings, work_table;
6385 int *single_byte_char_found; 6739 int *single_byte_char_found;
6386 { 6740 {
6387 int c, len, idx; 6741 int c, len;
6388 Lisp_Object val; 6742 Lisp_Object val, ch;
6389 6743 Lisp_Object prev, tail;
6744
6745 if (NILP (safe_codings))
6746 goto done_safe_codings;
6390 while (p < pend) 6747 while (p < pend)
6391 { 6748 {
6392 c = STRING_CHAR_AND_LENGTH (p, pend - p, len); 6749 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6393 p += len; 6750 p += len;
6394 if (ASCII_BYTE_P (c)) 6751 if (ASCII_BYTE_P (c))
6395 /* We can ignore ASCII characters here. */ 6752 /* We can ignore ASCII characters here. */
6396 continue; 6753 continue;
6397 if (SINGLE_BYTE_CHAR_P (c)) 6754 if (SINGLE_BYTE_CHAR_P (c))
6398 *single_byte_char_found = 1; 6755 *single_byte_char_found = 1;
6399 if (NILP (safe_codings))
6400 continue;
6401 /* Check the safe coding systems for C. */ 6756 /* Check the safe coding systems for C. */
6402 val = char_table_ref_and_index (work_table, c, &idx); 6757 ch = make_number (c);
6758 val = Faref (work_table, ch);
6403 if (EQ (val, Qt)) 6759 if (EQ (val, Qt))
6404 /* This element was already checked. Ignore it. */ 6760 /* This element was already checked. Ignore it. */
6405 continue; 6761 continue;
6406 /* Remember that we checked this element. */ 6762 /* Remember that we checked this element. */
6407 CHAR_TABLE_SET (work_table, make_number (idx), Qt); 6763 Faset (work_table, ch, Qt);
6408 6764
6409 /* If there are some safe coding systems for C and we have 6765 for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6410 already found the other set of coding systems for the 6766 {
6411 different characters, get the intersection of them. */ 6767 Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6412 if (!EQ (safe_codings, Qt) && !NILP (val)) 6768 int encodable;
6413 val = intersection (safe_codings, val); 6769
6414 safe_codings = val; 6770 elt = XCAR (tail);
6415 } 6771 if (CONSP (XCDR (elt)))
6772 {
6773 /* This entry has this format now:
6774 ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6775 ACCEPT-LATIN-EXTRA ) */
6776 val = XCDR (elt);
6777 encodable = ! NILP (Faref (XCAR (val), ch));
6778 if (! encodable)
6779 {
6780 val = XCDR (val);
6781 translation_table = XCAR (val);
6782 hash_table = XCAR (XCDR (val));
6783 accept_latin_extra = XCAR (XCDR (XCDR (val)));
6784 }
6785 }
6786 else
6787 {
6788 /* This entry has this format now: ( CODING . SAFE-CHARS) */
6789 encodable = ! NILP (Faref (XCDR (elt), ch));
6790 if (! encodable)
6791 {
6792 /* Transform the format to:
6793 ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6794 ACCEPT-LATIN-EXTRA ) */
6795 val = Fget (XCAR (elt), Qcoding_system);
6796 translation_table
6797 = Fplist_get (AREF (val, 3),
6798 Qtranslation_table_for_encode);
6799 if (SYMBOLP (translation_table))
6800 translation_table = Fget (translation_table,
6801 Qtranslation_table);
6802 hash_table
6803 = (CHAR_TABLE_P (translation_table)
6804 ? XCHAR_TABLE (translation_table)->extras[1]
6805 : Qnil);
6806 accept_latin_extra
6807 = ((EQ (AREF (val, 0), make_number (2))
6808 && VECTORP (AREF (val, 4)))
6809 ? AREF (AREF (val, 4), 16)
6810 : Qnil);
6811 XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6812 translation_table, hash_table,
6813 accept_latin_extra));
6814 }
6815 }
6816
6817 if (! encodable
6818 && ((CHAR_TABLE_P (translation_table)
6819 && ! NILP (Faref (translation_table, ch)))
6820 || (HASH_TABLE_P (hash_table)
6821 && ! NILP (Fgethash (ch, hash_table, Qnil)))
6822 || (SINGLE_BYTE_CHAR_P (c)
6823 && ! NILP (accept_latin_extra)
6824 && VECTORP (Vlatin_extra_code_table)
6825 && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6826 encodable = 1;
6827 if (encodable)
6828 prev = tail;
6829 else
6830 {
6831 /* Exclude this coding system from SAFE_CODINGS. */
6832 if (EQ (tail, safe_codings))
6833 {
6834 safe_codings = XCDR (safe_codings);
6835 if (NILP (safe_codings))
6836 goto done_safe_codings;
6837 }
6838 else
6839 XSETCDR (prev, XCDR (tail));
6840 }
6841 }
6842 }
6843
6844 done_safe_codings:
6845 /* If the above loop was terminated before P reaches PEND, it means
6846 SAFE_CODINGS was set to nil. If we have not yet found an
6847 non-ASCII single-byte char, check it now. */
6848 if (! *single_byte_char_found)
6849 while (p < pend)
6850 {
6851 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6852 p += len;
6853 if (! ASCII_BYTE_P (c)
6854 && SINGLE_BYTE_CHAR_P (c))
6855 {
6856 *single_byte_char_found = 1;
6857 break;
6858 }
6859 }
6416 return safe_codings; 6860 return safe_codings;
6417 } 6861 }
6418
6419
6420 /* Return a list of coding systems that safely encode the text between
6421 START and END. If the text contains only ASCII or is unibyte,
6422 return t. */
6423 6862
6424 DEFUN ("find-coding-systems-region-internal", 6863 DEFUN ("find-coding-systems-region-internal",
6425 Ffind_coding_systems_region_internal, 6864 Ffind_coding_systems_region_internal,
6426 Sfind_coding_systems_region_internal, 2, 2, 0, 6865 Sfind_coding_systems_region_internal, 2, 2, 0,
6427 doc: /* Internal use only. */) 6866 doc: /* Internal use only. */)
6477 return Qt; 6916 return Qt;
6478 } 6917 }
6479 } 6918 }
6480 6919
6481 /* The text contains non-ASCII characters. */ 6920 /* The text contains non-ASCII characters. */
6482 work_table = Fcopy_sequence (Vchar_coding_system_table); 6921
6483 safe_codings = find_safe_codings (p1, p1end, Qt, work_table, 6922 work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6923 safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6924
6925 safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6484 &single_byte_char_found); 6926 &single_byte_char_found);
6485 if (p2 < p2end) 6927 if (p2 < p2end)
6486 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table, 6928 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6487 &single_byte_char_found); 6929 &single_byte_char_found);
6488
6489 if (EQ (safe_codings, Qt))
6490 ; /* Nothing to be done. */
6491 else if (!single_byte_char_found)
6492 {
6493 /* Append generic coding systems. */
6494 Lisp_Object args[2];
6495 args[0] = safe_codings;
6496 args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
6497 make_number (0));
6498 safe_codings = Fappend (2, args);
6499 }
6500 else
6501 safe_codings = Fcons (Qraw_text,
6502 Fcons (Qemacs_mule,
6503 Fcons (Qno_conversion, safe_codings)));
6504 return safe_codings;
6505 }
6506
6507
6508 static Lisp_Object
6509 find_safe_codings_2 (p, pend, safe_codings, work_table, single_byte_char_found)
6510 unsigned char *p, *pend;
6511 Lisp_Object safe_codings, work_table;
6512 int *single_byte_char_found;
6513 {
6514 int c, len, i;
6515 Lisp_Object val, ch;
6516 Lisp_Object prev, tail;
6517
6518 while (p < pend)
6519 {
6520 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6521 p += len;
6522 if (ASCII_BYTE_P (c))
6523 /* We can ignore ASCII characters here. */
6524 continue;
6525 if (SINGLE_BYTE_CHAR_P (c))
6526 *single_byte_char_found = 1;
6527 if (NILP (safe_codings))
6528 /* Already all coding systems are excluded. */
6529 continue;
6530 /* Check the safe coding systems for C. */
6531 ch = make_number (c);
6532 val = Faref (work_table, ch);
6533 if (EQ (val, Qt))
6534 /* This element was already checked. Ignore it. */
6535 continue;
6536 /* Remember that we checked this element. */
6537 Faset (work_table, ch, Qt);
6538
6539 for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6540 {
6541 val = XCAR (tail);
6542 if (NILP (Faref (XCDR (val), ch)))
6543 {
6544 /* Exclued this coding system from SAFE_CODINGS. */
6545 if (EQ (tail, safe_codings))
6546 safe_codings = XCDR (safe_codings);
6547 else
6548 XSETCDR (prev, XCDR (tail));
6549 }
6550 else
6551 prev = tail;
6552 }
6553 }
6554 return safe_codings;
6555 }
6556
6557 DEFUN ("find-coding-systems-region-internal-2",
6558 Ffind_coding_systems_region_internal_2,
6559 Sfind_coding_systems_region_internal_2, 2, 2, 0,
6560 doc: /* Internal use only. */)
6561 (start, end)
6562 Lisp_Object start, end;
6563 {
6564 Lisp_Object work_table, safe_codings;
6565 int non_ascii_p = 0;
6566 int single_byte_char_found = 0;
6567 const unsigned char *p1, *p1end, *p2, *p2end, *p;
6568
6569 if (STRINGP (start))
6570 {
6571 if (!STRING_MULTIBYTE (start))
6572 return Qt;
6573 p1 = SDATA (start), p1end = p1 + SBYTES (start);
6574 p2 = p2end = p1end;
6575 if (SCHARS (start) != SBYTES (start))
6576 non_ascii_p = 1;
6577 }
6578 else
6579 {
6580 int from, to, stop;
6581
6582 CHECK_NUMBER_COERCE_MARKER (start);
6583 CHECK_NUMBER_COERCE_MARKER (end);
6584 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6585 args_out_of_range (start, end);
6586 if (NILP (current_buffer->enable_multibyte_characters))
6587 return Qt;
6588 from = CHAR_TO_BYTE (XINT (start));
6589 to = CHAR_TO_BYTE (XINT (end));
6590 stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6591 p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6592 if (stop == to)
6593 p2 = p2end = p1end;
6594 else
6595 p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6596 if (XINT (end) - XINT (start) != to - from)
6597 non_ascii_p = 1;
6598 }
6599
6600 if (!non_ascii_p)
6601 {
6602 /* We are sure that the text contains no multibyte character.
6603 Check if it contains eight-bit-graphic. */
6604 p = p1;
6605 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6606 if (p == p1end)
6607 {
6608 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6609 if (p == p2end)
6610 return Qt;
6611 }
6612 }
6613
6614 /* The text contains non-ASCII characters. */
6615
6616 work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6617 safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6618
6619 safe_codings = find_safe_codings_2 (p1, p1end, safe_codings, work_table,
6620 &single_byte_char_found);
6621 if (p2 < p2end)
6622 safe_codings = find_safe_codings_2 (p2, p2end, safe_codings, work_table,
6623 &single_byte_char_found);
6624 if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars))) 6930 if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6625 safe_codings = Qt; 6931 safe_codings = Qt;
6626 else 6932 else
6627 { 6933 {
6628 /* Turn safe_codings to a list of coding systems... */ 6934 /* Turn safe_codings to a list of coding systems... */
7053 Lisp_Object coding_system; 7359 Lisp_Object coding_system;
7054 { 7360 {
7055 CHECK_SYMBOL (coding_system); 7361 CHECK_SYMBOL (coding_system);
7056 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); 7362 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
7057 /* We had better not send unsafe characters to terminal. */ 7363 /* We had better not send unsafe characters to terminal. */
7058 terminal_coding.flags |= CODING_FLAG_ISO_SAFE; 7364 terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
7059 /* Character composition should be disabled. */ 7365 /* Character composition should be disabled. */
7060 terminal_coding.composing = COMPOSITION_DISABLED; 7366 terminal_coding.composing = COMPOSITION_DISABLED;
7061 /* Error notification should be suppressed. */ 7367 /* Error notification should be suppressed. */
7062 terminal_coding.suppress_error = 1; 7368 terminal_coding.suppress_error = 1;
7063 terminal_coding.src_multibyte = 1; 7369 terminal_coding.src_multibyte = 1;
7075 setup_coding_system (Fcheck_coding_system (coding_system), 7381 setup_coding_system (Fcheck_coding_system (coding_system),
7076 &safe_terminal_coding); 7382 &safe_terminal_coding);
7077 /* Character composition should be disabled. */ 7383 /* Character composition should be disabled. */
7078 safe_terminal_coding.composing = COMPOSITION_DISABLED; 7384 safe_terminal_coding.composing = COMPOSITION_DISABLED;
7079 /* Error notification should be suppressed. */ 7385 /* Error notification should be suppressed. */
7080 terminal_coding.suppress_error = 1; 7386 safe_terminal_coding.suppress_error = 1;
7081 safe_terminal_coding.src_multibyte = 1; 7387 safe_terminal_coding.src_multibyte = 1;
7082 safe_terminal_coding.dst_multibyte = 0; 7388 safe_terminal_coding.dst_multibyte = 0;
7083 return Qnil; 7389 return Qnil;
7084 } 7390 }
7085 7391
7379 #ifdef emacs 7685 #ifdef emacs
7380 7686
7381 void 7687 void
7382 syms_of_coding () 7688 syms_of_coding ()
7383 { 7689 {
7690 staticpro (&Vcode_conversion_workbuf_name);
7691 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
7692
7384 Qtarget_idx = intern ("target-idx"); 7693 Qtarget_idx = intern ("target-idx");
7385 staticpro (&Qtarget_idx); 7694 staticpro (&Qtarget_idx);
7386 7695
7387 Qcoding_system_history = intern ("coding-system-history"); 7696 Qcoding_system_history = intern ("coding-system-history");
7388 staticpro (&Qcoding_system_history); 7697 staticpro (&Qcoding_system_history);
7467 Vcoding_system_safe_chars = Fcons (Qnil, Qnil); 7776 Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7468 staticpro (&Vcoding_system_safe_chars); 7777 staticpro (&Vcoding_system_safe_chars);
7469 7778
7470 Qtranslation_table = intern ("translation-table"); 7779 Qtranslation_table = intern ("translation-table");
7471 staticpro (&Qtranslation_table); 7780 staticpro (&Qtranslation_table);
7472 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1)); 7781 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7473 7782
7474 Qtranslation_table_id = intern ("translation-table-id"); 7783 Qtranslation_table_id = intern ("translation-table-id");
7475 staticpro (&Qtranslation_table_id); 7784 staticpro (&Qtranslation_table_id);
7476 7785
7477 Qtranslation_table_for_decode = intern ("translation-table-for-decode"); 7786 Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7489 /* Intern this now in case it isn't already done. 7798 /* Intern this now in case it isn't already done.
7490 Setting this variable twice is harmless. 7799 Setting this variable twice is harmless.
7491 But don't staticpro it here--that is done in alloc.c. */ 7800 But don't staticpro it here--that is done in alloc.c. */
7492 Qchar_table_extra_slots = intern ("char-table-extra-slots"); 7801 Qchar_table_extra_slots = intern ("char-table-extra-slots");
7493 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0)); 7802 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7494 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2)); 7803 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7495 7804
7496 Qvalid_codes = intern ("valid-codes"); 7805 Qvalid_codes = intern ("valid-codes");
7497 staticpro (&Qvalid_codes); 7806 staticpro (&Qvalid_codes);
7498 7807
7499 Qemacs_mule = intern ("emacs-mule"); 7808 Qemacs_mule = intern ("emacs-mule");
7500 staticpro (&Qemacs_mule); 7809 staticpro (&Qemacs_mule);
7501 7810
7502 Qraw_text = intern ("raw-text"); 7811 Qraw_text = intern ("raw-text");
7503 staticpro (&Qraw_text); 7812 staticpro (&Qraw_text);
7813
7814 Qutf_8 = intern ("utf-8");
7815 staticpro (&Qutf_8);
7816
7817 Qcoding_system_define_form = intern ("coding-system-define-form");
7818 staticpro (&Qcoding_system_define_form);
7504 7819
7505 defsubr (&Scoding_system_p); 7820 defsubr (&Scoding_system_p);
7506 defsubr (&Sread_coding_system); 7821 defsubr (&Sread_coding_system);
7507 defsubr (&Sread_non_nil_coding_system); 7822 defsubr (&Sread_non_nil_coding_system);
7508 defsubr (&Scheck_coding_system); 7823 defsubr (&Scheck_coding_system);
7509 defsubr (&Sdetect_coding_region); 7824 defsubr (&Sdetect_coding_region);
7510 defsubr (&Sdetect_coding_string); 7825 defsubr (&Sdetect_coding_string);
7511 defsubr (&Sfind_coding_systems_region_internal); 7826 defsubr (&Sfind_coding_systems_region_internal);
7512 defsubr (&Sfind_coding_systems_region_internal_2);
7513 defsubr (&Sunencodable_char_position); 7827 defsubr (&Sunencodable_char_position);
7514 defsubr (&Sdecode_coding_region); 7828 defsubr (&Sdecode_coding_region);
7515 defsubr (&Sencode_coding_region); 7829 defsubr (&Sencode_coding_region);
7516 defsubr (&Sdecode_coding_string); 7830 defsubr (&Sdecode_coding_string);
7517 defsubr (&Sencode_coding_string); 7831 defsubr (&Sencode_coding_string);
7551 doc: /* List of coding-categories (symbols) ordered by priority. 7865 doc: /* List of coding-categories (symbols) ordered by priority.
7552 7866
7553 On detecting a coding system, Emacs tries code detection algorithms 7867 On detecting a coding system, Emacs tries code detection algorithms
7554 associated with each coding-category one by one in this order. When 7868 associated with each coding-category one by one in this order. When
7555 one algorithm agrees with a byte sequence of source text, the coding 7869 one algorithm agrees with a byte sequence of source text, the coding
7556 system bound to the corresponding coding-category is selected. */); 7870 system bound to the corresponding coding-category is selected.
7871
7872 Don't modify this variable directly, but use `set-coding-priority'. */);
7557 { 7873 {
7558 int i; 7874 int i;
7559 7875
7560 Vcoding_category_list = Qnil; 7876 Vcoding_category_list = Qnil;
7561 for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--) 7877 for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7586 For output to files, if the above procedure does not specify a coding system, 7902 For output to files, if the above procedure does not specify a coding system,
7587 the value of `buffer-file-coding-system' is used. */); 7903 the value of `buffer-file-coding-system' is used. */);
7588 Vcoding_system_for_write = Qnil; 7904 Vcoding_system_for_write = Qnil;
7589 7905
7590 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used, 7906 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7591 doc: /* Coding system used in the latest file or process I/O. */); 7907 doc: /* Coding system used in the latest file or process I/O.
7908 Also set by `encode-coding-region', `decode-coding-region',
7909 `encode-coding-string' and `decode-coding-string'. */);
7592 Vlast_coding_system_used = Qnil; 7910 Vlast_coding_system_used = Qnil;
7593 7911
7594 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion, 7912 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7595 doc: /* *Non-nil means always inhibit code conversion of end-of-line format. 7913 doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7596 See info node `Coding Systems' and info node `Text and Binary' concerning 7914 See info node `Coding Systems' and info node `Text and Binary' concerning
7729 called even if `coding-system-for-write' is non-nil. The command 8047 called even if `coding-system-for-write' is non-nil. The command
7730 `universal-coding-system-argument' binds this variable to t temporarily. */); 8048 `universal-coding-system-argument' binds this variable to t temporarily. */);
7731 coding_system_require_warning = 0; 8049 coding_system_require_warning = 0;
7732 8050
7733 8051
7734 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
7735 doc: /* Char-table containing safe coding systems of each characters.
7736 Each element doesn't include such generic coding systems that can
7737 encode any characters. They are in the first extra slot. */);
7738 Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
7739
7740 DEFVAR_BOOL ("inhibit-iso-escape-detection", 8052 DEFVAR_BOOL ("inhibit-iso-escape-detection",
7741 &inhibit_iso_escape_detection, 8053 &inhibit_iso_escape_detection,
7742 doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection. 8054 doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7743 8055
7744 By default, on reading a file, Emacs tries to detect how the text is 8056 By default, on reading a file, Emacs tries to detect how the text is
7791 return str; 8103 return str;
7792 } 8104 }
7793 8105
7794 #endif /* emacs */ 8106 #endif /* emacs */
7795 8107
8108 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
8109 (do not change this comment) */