comparison src/coding.h @ 88365:d29ddd7d50f9

Completely re-written.
author Kenichi Handa <handa@m17n.org>
date Fri, 01 Mar 2002 01:17:24 +0000
parents 72a36dc37ced
children 1dd66ce3fc9c
comparison
equal deleted inserted replaced
88364:f6557aebe110 88365:d29ddd7d50f9
1 /* Header for coding system handler. 1 /* Header for coding system handler.
2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. 2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation. 3 Licensed to the Free Software Foundation.
4 Copyright (C) 2001, 2002
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H13PRO009
4 7
5 This file is part of GNU Emacs. 8 This file is part of GNU Emacs.
6 9
7 GNU Emacs is free software; you can redistribute it and/or modify 10 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by 11 it under the terms of the GNU General Public License as published by
20 Boston, MA 02111-1307, USA. */ 23 Boston, MA 02111-1307, USA. */
21 24
22 #ifndef EMACS_CODING_H 25 #ifndef EMACS_CODING_H
23 #define EMACS_CODING_H 26 #define EMACS_CODING_H
24 27
25 #include "ccl.h" 28 /* Index to arguments of Fdefine_coding_system_internal. */
26 29
27 /*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/ 30 enum define_coding_system_arg_index
28 31 {
29 /* All code (1-byte) of Emacs' internal format is classified into one 32 coding_arg_name,
30 of the followings. See also `charset.h'. */ 33 coding_arg_mnemonic,
31 enum emacs_code_class_type 34 coding_arg_coding_type,
32 { 35 coding_arg_charset_list,
33 EMACS_control_code, /* Control codes in the range 36 coding_arg_ascii_compatible_p,
34 0x00..0x1F and 0x7F except for the 37 coding_arg_decode_translation_table,
35 following two codes. */ 38 coding_arg_encode_translation_table,
36 EMACS_linefeed_code, /* 0x0A (linefeed) to denote 39 coding_arg_post_read_conversion,
37 end-of-line. */ 40 coding_arg_pre_write_conversion,
38 EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used 41 coding_arg_default_char,
39 in selective display mode. */ 42 coding_arg_plist,
40 EMACS_ascii_code, /* ASCII characters. */ 43 coding_arg_eol_type,
41 EMACS_leading_code_2, /* Base leading code of official 44 coding_arg_max
42 TYPE9N character. */ 45 };
43 EMACS_leading_code_3, /* Base leading code of private TYPE9N 46
44 or official TYPE9Nx9N character. */ 47 enum define_coding_iso2022_arg_index
45 EMACS_leading_code_4, /* Base leading code of private 48 {
46 TYPE9Nx9N character. */ 49 coding_arg_iso2022_initial = coding_arg_max,
47 EMACS_invalid_code /* Invalid code, i.e. a base leading 50 coding_arg_iso2022_reg_usage,
48 code not yet assigned to any 51 coding_arg_iso2022_request,
49 charset, or a code of the range 52 coding_arg_iso2022_flags,
50 0xA0..0xFF. */ 53 coding_arg_iso2022_max
51 }; 54 };
52 55
53 extern enum emacs_code_class_type emacs_code_class[256]; 56 enum define_coding_utf16_arg_index
54 57 {
55 /*** ISO2022 section ***/ 58 coding_arg_utf16_bom = coding_arg_max,
56 59 coding_arg_utf16_endian,
57 /* Macros to define code of control characters for ISO2022's functions. */ 60 coding_arg_utf16_max
58 /* code */ /* function */ 61 };
59 #define ISO_CODE_LF 0x0A /* line-feed */ 62
60 #define ISO_CODE_CR 0x0D /* carriage-return */ 63 enum define_coding_ccl_arg_index
61 #define ISO_CODE_SO 0x0E /* shift-out */ 64 {
62 #define ISO_CODE_SI 0x0F /* shift-in */ 65 coding_arg_ccl_decoder,
63 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */ 66 coding_arg_ccl_encoder,
64 #define ISO_CODE_ESC 0x1B /* escape */ 67 coding_arg_ccl_valids,
65 #define ISO_CODE_SS2 0x8E /* single-shift-2 */ 68 coding_arg_ccl_max
66 #define ISO_CODE_SS3 0x8F /* single-shift-3 */ 69 };
67 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ 70
68 71 extern Lisp_Object Vcoding_system_hash_table;
69 /* All code (1-byte) of ISO2022 is classified into one of the 72
70 followings. */ 73 /* Enumeration of coding system type. */
71 enum iso_code_class_type 74
72 { 75 enum coding_system_type
73 ISO_control_0, /* Control codes in the range 76 {
74 0x00..0x1F and 0x7F, except for the 77 coding_type_charset,
75 following 5 codes. */ 78 coding_type_utf_8,
76 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */ 79 coding_type_utf_16,
77 ISO_shift_out, /* ISO_CODE_SO (0x0E) */ 80 coding_type_iso_2022,
78 ISO_shift_in, /* ISO_CODE_SI (0x0F) */ 81 coding_type_emacs_mule,
79 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ 82 coding_type_sjis,
80 ISO_escape, /* ISO_CODE_SO (0x1B) */ 83 coding_type_ccl,
81 ISO_control_1, /* Control codes in the range 84 coding_type_raw_text,
82 0x80..0x9F, except for the 85 coding_type_undecided,
83 following 3 codes. */ 86 coding_type_max
84 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */ 87 };
85 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */ 88
86 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */ 89
87 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */ 90 /* Enumeration of end-of-line format type. */
88 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */ 91
89 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */ 92 enum end_of_line_type
90 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */ 93 {
91 }; 94 eol_lf, /* Line-feed only, same as Emacs' internal
92 95 format. */
93 /** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags' 96 eol_crlf, /* Sequence of carriage-return and
94 element in the structure `coding_system'. This information is used 97 line-feed. */
95 while encoding a text to ISO2022. **/ 98 eol_cr, /* Carriage-return only. */
96 99 eol_any, /* Accept any of above. Produce line-feed
97 /* If set, produce short-form designation sequence (e.g. ESC $ A) 100 only. */
98 instead of long-form sequence (e.g. ESC $ ( A). */ 101 eol_undecided, /* This value is used to denote that the
99 #define CODING_FLAG_ISO_SHORT_FORM 0x0001 102 eol-type is not yet undecided. */
100 103 eol_type_max
101 /* If set, reset graphic planes and registers at end-of-line to the 104 };
102 initial state. */ 105
103 #define CODING_FLAG_ISO_RESET_AT_EOL 0x0002 106 /* Enumeration of index to an attribute vector of a coding system. */
104 107
105 /* If set, reset graphic planes and registers before any control 108 enum coding_attr_index
106 characters to the initial state. */ 109 {
107 #define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004 110 coding_attr_base_name,
108 111 coding_attr_docstring,
109 /* If set, encode by 7-bit environment. */ 112 coding_attr_mnemonic,
110 #define CODING_FLAG_ISO_SEVEN_BITS 0x0008 113 coding_attr_type,
111 114 coding_attr_charset_list,
112 /* If set, use locking-shift function. */ 115 coding_attr_ascii_compat,
113 #define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010 116 coding_attr_decode_tbl,
114 117 coding_attr_encode_tbl,
115 /* If set, use single-shift function. Overwrite 118 coding_attr_post_read,
116 CODING_FLAG_ISO_LOCKING_SHIFT. */ 119 coding_attr_pre_write,
117 #define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020 120 coding_attr_default_char,
118 121 coding_attr_plist,
119 /* If set, designate JISX0201-Roman instead of ASCII. */ 122
120 #define CODING_FLAG_ISO_USE_ROMAN 0x0040 123 coding_attr_category,
121 124 coding_attr_safe_charsets,
122 /* If set, designate JISX0208-1978 instead of JISX0208-1983. */ 125
123 #define CODING_FLAG_ISO_USE_OLDJIS 0x0080 126 /* The followings are extra attributes for each type. */
124 127 coding_attr_charset_valids,
125 /* If set, do not produce ISO6429's direction specifying sequence. */ 128
126 #define CODING_FLAG_ISO_NO_DIRECTION 0x0100 129 coding_attr_ccl_decoder,
127 130 coding_attr_ccl_encoder,
128 /* If set, assume designation states are reset at beginning of line on 131 coding_attr_ccl_valids,
129 output. */ 132
130 #define CODING_FLAG_ISO_INIT_AT_BOL 0x0200 133 coding_attr_iso_initial,
131 134 coding_attr_iso_usage,
132 /* If set, designation sequence should be placed at beginning of line 135 coding_attr_iso_request,
133 on output. */ 136 coding_attr_iso_flags,
134 #define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400 137
135 138 coding_attr_utf_16_bom,
136 /* If set, do not encode unsafe characters on output. */ 139 coding_attr_utf_16_endian,
137 #define CODING_FLAG_ISO_SAFE 0x0800 140
138 141 coding_attr_emacs_mule_full,
139 /* If set, extra latin codes (128..159) are accepted as a valid code 142
140 on input. */ 143 coding_attr_last_index
141 #define CODING_FLAG_ISO_LATIN_EXTRA 0x1000 144 };
142 145
143 /* If set, use designation escape sequence. */ 146
144 #define CODING_FLAG_ISO_DESIGNATION 0x10000 147 #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
145 148 #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
146 /* A character to be produced on output if encoding of the original 149 #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
147 character is prohibited by CODING_FLAG_ISO_SAFE. */ 150 #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
148 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION 077 /* 077 == `?' */ 151 #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
149 152 #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
150 /* Structure of the field `spec.iso2022' in the structure `coding_system'. */ 153 #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
151 struct iso2022_spec 154 #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
155 #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
156 #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
157 #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
158 #define CODING_ATTR_DIRECTION(attrs) AREF (attrs, coding_attr_direction)
159 #define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
160 #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
161 #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
162 #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
163
164
165 #define CODING_ID_ATTRS(id) \
166 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
167
168 #define CODING_ID_ALIASES(id) \
169 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
170
171 #define CODING_ID_EOL_TYPE(id) \
172 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
173
174 #define CODING_ID_NAME(id) \
175 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
176
177 #define CODING_SYSTEM_SPEC(coding_system_symbol) \
178 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
179
180 #define CODING_SYSTEM_ID(coding_system_symbol) \
181 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
182 coding_system_symbol, NULL)
183
184 #define CODING_SYSTEM_P(coding_system_symbol) \
185 (! NILP (CODING_SYSTEM_SPEC (coding_system_symbol)))
186
187 #define CHECK_CODING_SYSTEM(x) \
188 do { \
189 if (!CODING_SYSTEM_P (x)) \
190 x = wrong_type_argument (Qcoding_system_p, (x)); \
191 } while (0)
192
193
194 #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
195 do { \
196 spec = CODING_SYSTEM_SPEC (x); \
197 if (NILP (spec)) \
198 x = wrong_type_argument (Qcoding_system_p, (x)); \
199 } while (0)
200
201
202 #define CHECK_CODING_SYSTEM_GET_ID(x, id) \
203 do \
204 { \
205 id = CODING_SYSTEM_ID (x); \
206 if (id < 0) \
207 x = wrong_type_argument (Qcoding_system_p, (x)); \
208 } while (0)
209
210
211 /*** GENERAL section ***/
212
213 /* Enumeration of result code of code conversion. */
214 enum coding_result_code
215 {
216 CODING_RESULT_SUCCESS,
217 CODING_RESULT_INSUFFICIENT_SRC,
218 CODING_RESULT_INSUFFICIENT_DST,
219 CODING_RESULT_INCONSISTENT_EOL,
220 CODING_RESULT_INSUFFICIENT_CMP,
221 CODING_RESULT_INTERRUPT,
222 CODING_RESULT_INSUFFICIENT_MEM
223 };
224
225
226 /* Macros used for the member `mode' of the struct coding_system. */
227
228 /* If set, recover the original CR or LF of the already decoded text
229 when the decoding routine encounters an inconsistent eol format. */
230 #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
231
232 /* If set, the decoding/encoding routines treat the current data as
233 the last block of the whole text to be converted, and do
234 appropriate fisishing job. */
235 #define CODING_MODE_LAST_BLOCK 0x02
236
237 /* If set, it means that the current source text is in a buffer which
238 enables selective display. */
239 #define CODING_MODE_SELECTIVE_DISPLAY 0x04
240
241 /* This flag is used by the decoding/encoding routines on the fly. If
242 set, it means that right-to-left text is being processed. */
243 #define CODING_MODE_DIRECTION 0x08
244
245 #define CODING_MODE_FIXED_DESTINATION 0x10
246
247 #define CODING_MODE_SAFE_ENCODING 0x20
248
249 /* Structure of the field `spec.iso_2022' in the structure
250 `coding_system'. */
251 struct iso_2022_spec
152 { 252 {
253 /* */
254 unsigned flags;
255
153 /* The current graphic register invoked to each graphic plane. */ 256 /* The current graphic register invoked to each graphic plane. */
154 int current_invocation[2]; 257 int current_invocation[2];
155 258
156 /* The current charset designated to each graphic register. */ 259 /* The current charset designated to each graphic register. The
260 value -1 means that not charset is designated, -2 means that
261 there was an invalid designation previously. */
157 int current_designation[4]; 262 int current_designation[4];
158
159 /* A charset initially designated to each graphic register. */
160 int initial_designation[4];
161
162 /* If not -1, it is a graphic register specified in an invalid
163 designation sequence. */
164 int last_invalid_designation_register;
165
166 /* A graphic register to which each charset should be designated. */
167 unsigned char requested_designation[MAX_CHARSET + 1];
168
169 /* A revision number to be specified for each charset on encoding.
170 The value 255 means no revision number for the corresponding
171 charset. */
172 unsigned char charset_revision_number[MAX_CHARSET + 1];
173 263
174 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked 264 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
175 by single-shift while encoding. */ 265 by single-shift while encoding. */
176 int single_shifting; 266 int single_shifting;
177 267
178 /* Set to 1 temporarily only when processing at beginning of line. */ 268 /* Set to 1 temporarily only when processing at beginning of line. */
179 int bol; 269 int bol;
180 }; 270 };
181 271
182 /* Macros to access each field in the structure `spec.iso2022'. */ 272 struct ccl_spec;
183 #define CODING_SPEC_ISO_INVOCATION(coding, plane) \ 273
184 (coding)->spec.iso2022.current_invocation[plane] 274 enum utf_16_bom_type
185 #define CODING_SPEC_ISO_DESIGNATION(coding, reg) \ 275 {
186 (coding)->spec.iso2022.current_designation[reg] 276 utf_16_detect_bom,
187 #define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \ 277 utf_16_without_bom,
188 (coding)->spec.iso2022.initial_designation[reg] 278 utf_16_with_bom
189 #define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \ 279 };
190 (coding)->spec.iso2022.requested_designation[charset] 280
191 #define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \ 281 enum utf_16_endian_type
192 (coding)->spec.iso2022.charset_revision_number[charset] 282 {
193 #define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \ 283 utf_16_big_endian,
194 (coding)->spec.iso2022.single_shifting 284 utf_16_little_endian
195 #define CODING_SPEC_ISO_BOL(coding) \ 285 };
196 (coding)->spec.iso2022.bol 286
197 287 struct utf_16_spec
198 /* A value which may appear in
199 coding->spec.iso2022.requested_designation indicating that the
200 corresponding charset does not request any graphic register to be
201 designated. */
202 #define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4
203
204 /* Return a charset which is currently designated to the graphic plane
205 PLANE in the coding-system CODING. */
206 #define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \
207 ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \
208 ? -1 \
209 : CODING_SPEC_ISO_DESIGNATION (coding, \
210 CODING_SPEC_ISO_INVOCATION (coding, plane)))
211
212 /*** BIG5 section ***/
213
214 /* Macros to denote each type of BIG5 coding system. */
215 #define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of
216 BIG5 developed by Hong Kong
217 University. */
218 #define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants
219 of BIG5 developed by the
220 company ETen in Taiwan. */
221
222 /*** GENERAL section ***/
223
224 /* Types of coding system. */
225 enum coding_type
226 {
227 coding_type_no_conversion, /* A coding system which requires no
228 conversion for reading and writing
229 including end-of-line format. */
230 coding_type_emacs_mule, /* A coding system used in Emacs'
231 buffer and string. Requires no
232 conversion for reading and writing
233 except for end-of-line format. */
234 coding_type_undecided, /* A coding system which requires
235 automatic detection of a real
236 coding system. */
237 coding_type_sjis, /* SJIS coding system for Japanese. */
238 coding_type_iso2022, /* Any coding system of ISO2022
239 variants. */
240 coding_type_big5, /* BIG5 coding system for Chinese. */
241 coding_type_ccl, /* The coding system of which decoder
242 and encoder are written in CCL. */
243 coding_type_raw_text /* A coding system for a text
244 containing random 8-bit code which
245 does not require code conversion
246 except for end-of-line format. */
247 };
248
249 /* Formats of end-of-line. */
250 #define CODING_EOL_LF 0 /* Line-feed only, same as Emacs'
251 internal format. */
252 #define CODING_EOL_CRLF 1 /* Sequence of carriage-return and
253 line-feed. */
254 #define CODING_EOL_CR 2 /* Carriage-return only. */
255 #define CODING_EOL_UNDECIDED 3 /* This value is used to denote the
256 eol-type is not yet decided. */
257 #define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the
258 eol-type is not consistent
259 through the file. */
260
261 /* 1 iff composing. */
262 #define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO)
263
264 #define COMPOSITION_DATA_SIZE 4080
265 #define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2)
266
267 /* Data structure to hold information about compositions of text that
268 is being decoded or encode. ISO 2022 base code conversion routines
269 handle special ESC sequences for composition specification. But,
270 they can't get/put such information directly from/to a buffer in
271 the deepest place. So, they store or retrieve the information
272 through this structure.
273
274 The encoder stores the information in this structure when it meets
275 ESC sequences for composition while encoding codes, then, after all
276 text codes are encoded, puts `composition' properties on the text
277 by referring to the structure.
278
279 The decoder at first stores the information of a text to be
280 decoded, then, while decoding codes, generates ESC sequences for
281 composition at proper places by referring to the structure. */
282
283 struct composition_data
284 { 288 {
285 /* The character position of the first character to be encoded or 289 enum utf_16_bom_type bom;
286 decoded. START and END (see below) are relative to this 290 enum utf_16_endian_type endian;
287 position. */ 291 int surrogate;
288 int char_offset;
289
290 /* The composition data. These elements are repeated for each
291 composition:
292 LENGTH START END METHOD [ COMPONENT ... ]
293 where,
294 LENGTH is the number of elements for this composition.
295
296 START and END are starting and ending character positions of
297 the composition relative to `char_offset'.
298
299 METHOD is one of `enum composing_status' specifying the way of
300 composition.
301
302 COMPONENT is a character or an encoded composition rule. */
303 int data[COMPOSITION_DATA_SIZE];
304
305 /* The number of elements in `data' currently used. */
306 int used;
307
308 /* Pointers to the previous and next structures. When `data' is
309 filled up, another structure is allocated and linked in `next'.
310 The new structure has backward link to this structure in `prev'.
311 The number of chained structures depends on how many compositions
312 the text being encoded or decoded contains. */
313 struct composition_data *prev, *next;
314 }; 292 };
315
316 /* Macros used for the member `result' of the struct
317 coding_system. */
318 #define CODING_FINISH_NORMAL 0
319 #define CODING_FINISH_INSUFFICIENT_SRC 1
320 #define CODING_FINISH_INSUFFICIENT_DST 2
321 #define CODING_FINISH_INCONSISTENT_EOL 3
322 #define CODING_FINISH_INSUFFICIENT_CMP 4
323 #define CODING_FINISH_INTERRUPT 5
324
325 /* Macros used for the member `mode' of the struct coding_system. */
326
327 /* If set, recover the original CR or LF of the already decoded text
328 when the decoding routine encounters an inconsistent eol format. */
329 #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
330
331 /* If set, the decoding/encoding routines treat the current data as
332 the last block of the whole text to be converted, and do
333 appropriate finishing job. */
334 #define CODING_MODE_LAST_BLOCK 0x02
335
336 /* If set, it means that the current source text is in a buffer which
337 enables selective display. */
338 #define CODING_MODE_SELECTIVE_DISPLAY 0x04
339
340 /* This flag is used by the decoding/encoding routines on the fly. If
341 set, it means that right-to-left text is being processed. */
342 #define CODING_MODE_DIRECTION 0x08
343 293
344 struct coding_system 294 struct coding_system
345 { 295 {
346 /* Type of the coding system. */ 296 /* ID number of the coding system. This is an index to
347 enum coding_type type; 297 Vcoding_system_hash_table. This value is set by
348 298 setup_coding_system. At the early stage of building time, this
349 /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */ 299 value is -1 in the array coding_categories to indicate that no
350 int eol_type; 300 coding-system of that category is yet defined. */
301 int id;
351 302
352 /* Flag bits of the coding system. The meaning of each bit is common 303 /* Flag bits of the coding system. The meaning of each bit is common
353 to all types of coding systems. */ 304 to all types of coding systems. */
354 unsigned int common_flags; 305 int common_flags;
355
356 /* Flag bits of the coding system. The meaning of each bit depends
357 on the type of the coding system. */
358 unsigned int flags;
359 306
360 /* Mode bits of the coding system. See the comments of the macros 307 /* Mode bits of the coding system. See the comments of the macros
361 CODING_MODE_XXX. */ 308 CODING_MODE_XXX. */
362 unsigned int mode; 309 unsigned int mode;
363 310
364 /* The current status of composition handling. */
365 int composing;
366
367 /* 1 iff the next character is a composition rule. */
368 int composition_rule_follows;
369
370 /* Information of compositions are stored here on decoding and set
371 in advance on encoding. */
372 struct composition_data *cmp_data;
373
374 /* Index to cmp_data->data for the first element for the current
375 composition. */
376 int cmp_data_start;
377
378 /* Index to cmp_data->data for the current element for the current
379 composition. */
380 int cmp_data_index;
381
382 /* Detailed information specific to each type of coding system. */ 311 /* Detailed information specific to each type of coding system. */
383 union spec 312 union
384 { 313 {
385 struct iso2022_spec iso2022; 314 struct iso_2022_spec iso_2022;
386 struct ccl_spec ccl; /* Defined in ccl.h. */ 315 struct ccl_spec *ccl; /* Defined in ccl.h. */
316 struct utf_16_spec utf_16;
317 int emacs_mule_full_support;
387 } spec; 318 } spec;
388 319
389 /* Index number of coding category of the coding system. */ 320 int max_charset_id;
390 int category_idx; 321 char *safe_charsets;
391 322
392 /* The following two members specify how characters 128..159 are 323 /* The following two members specify how binary 8-bit code 128..255
393 represented in source and destination text respectively. 1 means 324 are represented in source and destination text respectively. 1
394 they are represented by 2-byte sequence, 0 means they are 325 means they are represented by 2-byte sequence, 0 means they are
395 represented by 1-byte as is (see the comment in charset.h). */ 326 represented by 1-byte as is (see the comment in character.h). */
396 unsigned src_multibyte : 1; 327 unsigned src_multibyte : 1;
397 unsigned dst_multibyte : 1; 328 unsigned dst_multibyte : 1;
398 329
399 /* How may heading bytes we can skip for decoding. This is set to 330 /* How may heading bytes we can skip for decoding. This is set to
400 -1 in setup_coding_system, and updated by detect_coding. So, 331 -1 in setup_coding_system, and updated by detect_coding. So,
401 when this is equal to the byte length of the text being 332 when this is equal to the byte length of the text being
402 converted, we can skip the actual conversion process. */ 333 converted, we can skip the actual conversion process. */
403 int heading_ascii; 334 int head_ascii;
404 335
405 /* The following members are set by encoding/decoding routine. */ 336 /* The following members are set by encoding/decoding routine. */
406 int produced, produced_char, consumed, consumed_char; 337 EMACS_INT produced, produced_char, consumed, consumed_char;
407 338
408 /* Number of error source data found in a decoding routine. */ 339 /* Number of error source data found in a decoding routine. */
409 int errors; 340 int errors;
410 341
411 /* Finish status of code conversion. It should be one of macros 342 /* Store the positions of error source data. */
412 CODING_FINISH_XXXX. */ 343 EMACS_INT *error_positions;
413 int result; 344
414 345 /* Finish status of code conversion. */
415 /* If nonzero, suppress error notification. */ 346 enum coding_result_code result;
416 int suppress_error;
417 347
418 /* The following members are all Lisp symbols. We don't have to 348 /* The following members are all Lisp symbols. We don't have to
419 protect them from GC because the current garbage collection 349 protect them from GC because the current garbage collection
420 doesn't relocate Lisp symbols. But, when it is changed, we must 350 doesn't relocate Lisp symbols. But, when it is changed, we must
421 find a way to protect them. */ 351 find a way to protect them. */
422 352
423 /* Backward pointer to the Lisp symbol of the coding system. */ 353 EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
424 Lisp_Object symbol; 354 Lisp_Object src_object;
425 355 unsigned char *source;
426 /* Lisp function (symbol) to be called after decoding to do 356
427 additional conversion, or nil. */ 357 EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
428 Lisp_Object post_read_conversion; 358 Lisp_Object dst_object;
429 359 unsigned char *destination;
430 /* Lisp function (symbol) to be called before encoding to do 360
431 additional conversion, or nil. */ 361 int chars_at_source;
432 Lisp_Object pre_write_conversion; 362
433 363 /* If an element is non-negative, it is a character code.
434 /* Character translation tables to look up, or nil. */ 364
435 Lisp_Object translation_table_for_decode; 365 If it is in the range -128..-1, it is a 8-bit character code
436 Lisp_Object translation_table_for_encode; 366 minus 256.
367
368 If it is less than -128, it specifies the start of an annotation
369 chunk. The length of the chunk is -128 minus the value of the
370 element. The following elements are OFFSET, ANNOTATION-TYPE, and
371 a sequence of actual data for the annotation. OFFSET is a
372 character position offset from dst_pos or src_pos,
373 ANNOTATION-TYPE specfies the meaning of the annotation and how to
374 handle the following data.. */
375 int *charbuf;
376 int charbuf_size, charbuf_used;
377
378 /* Set to 1 if charbuf contains an annotation. */
379 int annotated;
380
381 unsigned char carryover[64];
382 int carryover_bytes;
383
384 int default_char;
385
386 int (*detector) P_ ((struct coding_system *, int *));
387 void (*decoder) P_ ((struct coding_system *));
388 int (*encoder) P_ ((struct coding_system *));
437 }; 389 };
438 390
439 #define CODING_REQUIRE_FLUSHING_MASK 1 391 /* Meanings of bits in the member `common_flags' of the structure
440 #define CODING_REQUIRE_DECODING_MASK 2 392 coding_system. The lowest 8 bits are reserved for various kind of
441 #define CODING_REQUIRE_ENCODING_MASK 4 393 annotations (currently two of them are used). */
442 #define CODING_REQUIRE_DETECTION_MASK 8 394 #define CODING_ANNOTATION_MASK 0x00FF
443 395 #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
444 /* Return 1 if the coding system CODING requires specific code to be 396 #define CODING_ANNOTATE_DIRECTION_MASK 0x0002
397 #define CODING_FOR_UNIBYTE_MASK 0x0100
398 #define CODING_REQUIRE_FLUSHING_MASK 0x0200
399 #define CODING_REQUIRE_DECODING_MASK 0x0400
400 #define CODING_REQUIRE_ENCODING_MASK 0x0800
401 #define CODING_REQUIRE_DETECTION_MASK 0x1000
402 #define CODING_RESET_AT_BOL_MASK 0x2000
403
404 /* Return 1 if the coding context CODING requires annotaion
405 handling. */
406 #define CODING_REQUIRE_ANNOTATION(coding) \
407 ((coding)->common_flags & CODING_ANNOTATION_MASK)
408
409 /* Return 1 if the coding context CODING prefers decoding into unibyte. */
410 #define CODING_FOR_UNIBYTE(coding) \
411 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
412
413 /* Return 1 if the coding context CODING requires specific code to be
445 attached at the tail of converted text. */ 414 attached at the tail of converted text. */
446 #define CODING_REQUIRE_FLUSHING(coding) \ 415 #define CODING_REQUIRE_FLUSHING(coding) \
447 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK) 416 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
448 417
449 /* Return 1 if the coding system CODING requires code conversion on 418 /* Return 1 if the coding context CODING requires code conversion on
450 decoding. */ 419 decoding. */
451 #define CODING_REQUIRE_DECODING(coding) \ 420 #define CODING_REQUIRE_DECODING(coding) \
452 ((coding)->dst_multibyte \ 421 ((coding)->dst_multibyte \
453 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) 422 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
454 423
455 /* Return 1 if the coding system CODING requires code conversion on 424
425 /* Return 1 if the coding context CODING requires code conversion on
456 encoding. */ 426 encoding. */
457 #define CODING_REQUIRE_ENCODING(coding) \ 427 #define CODING_REQUIRE_ENCODING(coding) \
458 ((coding)->src_multibyte \ 428 ((coding)->src_multibyte \
459 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK) 429 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
460 430 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
461 /* Return 1 if the coding system CODING requires some kind of code 431
432
433 /* Return 1 if the coding context CODING requires some kind of code
462 detection. */ 434 detection. */
463 #define CODING_REQUIRE_DETECTION(coding) \ 435 #define CODING_REQUIRE_DETECTION(coding) \
464 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) 436 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
465 437
466 /* Return 1 if the coding system CODING requires code conversion on 438 /* Return 1 if the coding context CODING requires code conversion on
467 decoding or some kind of code detection. */ 439 decoding or some kind of code detection. */
468 #define CODING_MAY_REQUIRE_DECODING(coding) \ 440 #define CODING_MAY_REQUIRE_DECODING(coding) \
469 (CODING_REQUIRE_DECODING (coding) \ 441 (CODING_REQUIRE_DECODING (coding) \
470 || CODING_REQUIRE_DETECTION (coding)) 442 || CODING_REQUIRE_DETECTION (coding))
471 443
472 /* Index for each coding category in `coding_category_table' */
473 #define CODING_CATEGORY_IDX_EMACS_MULE 0
474 #define CODING_CATEGORY_IDX_SJIS 1
475 #define CODING_CATEGORY_IDX_ISO_7 2
476 #define CODING_CATEGORY_IDX_ISO_7_TIGHT 3
477 #define CODING_CATEGORY_IDX_ISO_8_1 4
478 #define CODING_CATEGORY_IDX_ISO_8_2 5
479 #define CODING_CATEGORY_IDX_ISO_7_ELSE 6
480 #define CODING_CATEGORY_IDX_ISO_8_ELSE 7
481 #define CODING_CATEGORY_IDX_CCL 8
482 #define CODING_CATEGORY_IDX_BIG5 9
483 #define CODING_CATEGORY_IDX_UTF_8 10
484 #define CODING_CATEGORY_IDX_UTF_16_BE 11
485 #define CODING_CATEGORY_IDX_UTF_16_LE 12
486 #define CODING_CATEGORY_IDX_RAW_TEXT 13
487 #define CODING_CATEGORY_IDX_BINARY 14
488 #define CODING_CATEGORY_IDX_MAX 15
489
490 /* Definitions of flag bits returned by the function
491 detect_coding_mask (). */
492 #define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE)
493 #define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS)
494 #define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7)
495 #define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT)
496 #define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1)
497 #define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2)
498 #define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE)
499 #define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE)
500 #define CODING_CATEGORY_MASK_CCL (1 << CODING_CATEGORY_IDX_CCL)
501 #define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5)
502 #define CODING_CATEGORY_MASK_UTF_8 (1 << CODING_CATEGORY_IDX_UTF_8)
503 #define CODING_CATEGORY_MASK_UTF_16_BE (1 << CODING_CATEGORY_IDX_UTF_16_BE)
504 #define CODING_CATEGORY_MASK_UTF_16_LE (1 << CODING_CATEGORY_IDX_UTF_16_LE)
505 #define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT)
506 #define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY)
507
508 /* This value is returned if detect_coding_mask () find nothing other
509 than ASCII characters. */
510 #define CODING_CATEGORY_MASK_ANY \
511 ( CODING_CATEGORY_MASK_EMACS_MULE \
512 | CODING_CATEGORY_MASK_SJIS \
513 | CODING_CATEGORY_MASK_ISO_7 \
514 | CODING_CATEGORY_MASK_ISO_7_TIGHT \
515 | CODING_CATEGORY_MASK_ISO_8_1 \
516 | CODING_CATEGORY_MASK_ISO_8_2 \
517 | CODING_CATEGORY_MASK_ISO_7_ELSE \
518 | CODING_CATEGORY_MASK_ISO_8_ELSE \
519 | CODING_CATEGORY_MASK_CCL \
520 | CODING_CATEGORY_MASK_BIG5 \
521 | CODING_CATEGORY_MASK_UTF_8 \
522 | CODING_CATEGORY_MASK_UTF_16_BE \
523 | CODING_CATEGORY_MASK_UTF_16_LE)
524
525 #define CODING_CATEGORY_MASK_ISO_7BIT \
526 (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT)
527
528 #define CODING_CATEGORY_MASK_ISO_8BIT \
529 (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2)
530
531 #define CODING_CATEGORY_MASK_ISO_SHIFT \
532 (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE)
533
534 #define CODING_CATEGORY_MASK_ISO \
535 ( CODING_CATEGORY_MASK_ISO_7BIT \
536 | CODING_CATEGORY_MASK_ISO_SHIFT \
537 | CODING_CATEGORY_MASK_ISO_8BIT)
538
539 #define CODING_CATEGORY_MASK_UTF_16_BE_LE \
540 (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE)
541
542 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and 444 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
543 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding 445 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
544 system. C1 and C2 are the 1st and 2nd position codes of Emacs' 446 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
545 internal format. */ 447 internal format. */
546 448
547 #define DECODE_SJIS(s1, s2, c1, c2) \ 449 #define SJIS_TO_JIS(code) \
548 do { \ 450 do { \
549 if (s2 >= 0x9F) \ 451 int s1, s2, j1, j2; \
550 c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \ 452 \
551 c2 = s2 - 0x7E; \ 453 s1 = (code) >> 8, s2 = (code) & 0xFF; \
552 else \ 454 \
553 c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \ 455 if (s2 >= 0x9F) \
554 c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F); \ 456 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
457 j2 = s2 - 0x7E); \
458 else \
459 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
460 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
461 (code) = (j1 << 8) | j2; \
555 } while (0) 462 } while (0)
556 463
557 #define ENCODE_SJIS(c1, c2, s1, s2) \ 464
465 #define JIS_TO_SJIS(code) \
558 do { \ 466 do { \
559 if (c1 & 1) \ 467 int s1, s2, j1, j2; \
560 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1), \ 468 \
561 s2 = c2 + ((c2 >= 0x60) ? 0x20 : 0x1F); \ 469 j1 = (code) >> 8, j2 = (code) & 0xFF; \
470 if (j1 & 1) \
471 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
472 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
562 else \ 473 else \
563 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0), \ 474 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
564 s2 = c2 + 0x7E; \ 475 s2 = j2 + 0x7E); \
476 (code) = (j1 << 8) | j2; \
565 } while (0) 477 } while (0)
478
566 479
567 /* Encode the file name NAME using the specified coding system 480 /* Encode the file name NAME using the specified coding system
568 for file names, if any. */ 481 for file names, if any. */
569 #define ENCODE_FILE(name) \ 482 #define ENCODE_FILE(name) \
570 (! NILP (Vfile_name_coding_system) \ 483 (! NILP (Vfile_name_coding_system) \
573 : (! NILP (Vdefault_file_name_coding_system) \ 486 : (! NILP (Vdefault_file_name_coding_system) \
574 && XFASTINT (Vdefault_file_name_coding_system) != 0 \ 487 && XFASTINT (Vdefault_file_name_coding_system) != 0 \
575 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ 488 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
576 : name)) 489 : name))
577 490
491
578 /* Decode the file name NAME using the specified coding system 492 /* Decode the file name NAME using the specified coding system
579 for file names, if any. */ 493 for file names, if any. */
580 #define DECODE_FILE(name) \ 494 #define DECODE_FILE(name) \
581 (! NILP (Vfile_name_coding_system) \ 495 (! NILP (Vfile_name_coding_system) \
582 && XFASTINT (Vfile_name_coding_system) != 0 \ 496 && XFASTINT (Vfile_name_coding_system) != 0 \
584 : (! NILP (Vdefault_file_name_coding_system) \ 498 : (! NILP (Vdefault_file_name_coding_system) \
585 && XFASTINT (Vdefault_file_name_coding_system) != 0 \ 499 && XFASTINT (Vdefault_file_name_coding_system) != 0 \
586 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ 500 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
587 : name)) 501 : name))
588 502
503
589 #ifdef WINDOWSNT 504 #ifdef WINDOWSNT
590 /* Encode the string STR using the specified coding system 505 /* Encode the string STR using the specified coding system
591 for w32 system functions, if any. */ 506 for w32 system functions, if any. */
592 #define ENCODE_SYSTEM(str) \ 507 #define ENCODE_SYSTEM(str) \
593 (! NILP (Vlocale_coding_system) \ 508 (! NILP (Vlocale_coding_system) \
594 && XFASTINT (Vlocale_coding_system) != 0 \ 509 && XFASTINT (Vlocale_coding_system) != 0 \
595 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ 510 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
596 : str) 511 : str)
597 512
598 /* Decode the string STR using the specified coding system 513 /* Decode the string STR using the specified coding system
599 for w32 system functions, if any. */ 514 for w32 system functions, if any. */
600 #define DECODE_SYSTEM(name) \ 515 #define DECODE_SYSTEM(name) \
601 (! NILP (Vlocale_coding_system) \ 516 (! NILP (Vlocale_coding_system) \
602 && XFASTINT (Vlocale_coding_system) != 0 \ 517 && XFASTINT (Vlocale_coding_system) != 0 \
603 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ 518 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
604 : str) 519 : str)
605 520
606 #else /* WINDOWSNT */ 521 #else /* WINDOWSNT */
607 522
608 #define ENCODE_SYSTEM(str) string_make_unibyte(str) 523 #define ENCODE_SYSTEM(str) string_make_unibyte(str)
609 #define DECODE_SYSTEM(name) name 524 #define DECODE_SYSTEM(name) name
610 525
611 #endif /* !WINDOWSNT */ 526 #endif /* !WINDOWSNT */
612 527
613 /* Extern declarations. */ 528 /* Extern declarations. */
614 extern int decode_coding P_ ((struct coding_system *, unsigned char *, 529 extern Lisp_Object make_conversion_work_buffer P_ ((int));
615 unsigned char *, int, int)); 530 extern Lisp_Object code_conversion_restore P_ ((Lisp_Object));
616 extern int encode_coding P_ ((struct coding_system *, unsigned char *,
617 unsigned char *, int, int));
618 extern void coding_save_composition P_ ((struct coding_system *, int, int,
619 Lisp_Object));
620 extern void coding_free_composition_data P_ ((struct coding_system *));
621 extern void coding_adjust_composition_offset P_ ((struct coding_system *,
622 int));
623 extern void coding_allocate_composition_data P_ ((struct coding_system *,
624 int));
625 extern void coding_restore_composition P_ ((struct coding_system *,
626 Lisp_Object));
627 extern int code_convert_region P_ ((int, int, int, int, struct coding_system *,
628 int, int));
629 extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object,
630 struct coding_system *,
631 int));
632 extern int decoding_buffer_size P_ ((struct coding_system *, int)); 531 extern int decoding_buffer_size P_ ((struct coding_system *, int));
633 extern int encoding_buffer_size P_ ((struct coding_system *, int)); 532 extern int encoding_buffer_size P_ ((struct coding_system *, int));
634 extern void detect_coding P_ ((struct coding_system *, unsigned char *, int)); 533 extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *));
635 extern void detect_eol P_ ((struct coding_system *, unsigned char *, int)); 534 extern void detect_coding P_ ((struct coding_system *));
636 extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *)); 535 extern Lisp_Object code_convert_region P_ ((EMACS_INT, EMACS_INT,
637 extern Lisp_Object code_convert_string P_ ((Lisp_Object, 536 Lisp_Object, Lisp_Object,
638 struct coding_system *, int, int)); 537 int, int));
639 extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object, 538 extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object,
640 Lisp_Object, int)); 539 Lisp_Object, int, int, int));
641 extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object, 540 extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
642 int)); 541 int));
643 extern void setup_raw_text_coding_system P_ ((struct coding_system *)); 542 extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object));
644 extern Lisp_Object encode_coding_string P_ ((Lisp_Object, 543 extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object));
645 struct coding_system *, int)); 544
646 extern Lisp_Object decode_coding_string P_ ((Lisp_Object, 545 extern int decode_coding_gap P_ ((struct coding_system *,
647 struct coding_system *, int)); 546 EMACS_INT, EMACS_INT));
547 extern int encode_coding_gap P_ ((struct coding_system *,
548 EMACS_INT, EMACS_INT));
549 extern void decode_coding_object P_ ((struct coding_system *,
550 Lisp_Object, EMACS_INT, EMACS_INT,
551 EMACS_INT, EMACS_INT, Lisp_Object));
552 extern void encode_coding_object P_ ((struct coding_system *,
553 Lisp_Object, EMACS_INT, EMACS_INT,
554 EMACS_INT, EMACS_INT, Lisp_Object));
555
556 #define decode_coding_region(coding, from, to) \
557 decode_coding_object (coding, Fcurrent_buffer (), \
558 from, CHAR_TO_BYTE (from), \
559 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
560
561
562 #define encode_coding_region(coding, from, to) \
563 encode_coding_object (coding, Fcurrent_buffer (), \
564 from, CHAR_TO_BYTE (from), \
565 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
566
567
568 #define decode_coding_string(coding, string, nocopy) \
569 decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
570 STRING_BYTES (XSTRING (string)), Qt)
571
572 #define encode_coding_string(coding, string, nocopy) \
573 (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
574 STRING_BYTES (XSTRING (string)), Qt), \
575 (coding)->dst_object)
576
577
578 #define decode_coding_c_string(coding, src, bytes, dst_object) \
579 do { \
580 (coding)->source = (src); \
581 (coding)->src_chars = (coding)->src_bytes = (bytes); \
582 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
583 (dst_object)); \
584 } while (0)
585
586
587 extern Lisp_Object preferred_coding_system P_ (());
588
589
648 extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; 590 extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
649 extern Lisp_Object Qraw_text, Qemacs_mule; 591 extern Lisp_Object Qcoding_system_p;
592 extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
593 extern Lisp_Object Qiso_2022;
650 extern Lisp_Object Qbuffer_file_coding_system; 594 extern Lisp_Object Qbuffer_file_coding_system;
651 extern Lisp_Object Vcoding_category_list; 595
596 extern Lisp_Object Qunix, Qdos, Qmac;
652 597
653 extern Lisp_Object Qtranslation_table; 598 extern Lisp_Object Qtranslation_table;
654 extern Lisp_Object Qtranslation_table_id; 599 extern Lisp_Object Qtranslation_table_id;
655 600
656 /* Mnemonic strings to indicate each type of end-of-line. */ 601 /* Mnemonic strings to indicate each type of end-of-line. */
691 /* Coding-system of what is sent from terminal keyboard. This 636 /* Coding-system of what is sent from terminal keyboard. This
692 structure contains information of a coding-system specified by the 637 structure contains information of a coding-system specified by the
693 function `set-keyboard-coding-system'. */ 638 function `set-keyboard-coding-system'. */
694 extern struct coding_system keyboard_coding; 639 extern struct coding_system keyboard_coding;
695 640
696 /* Default coding system to be used to write a file. */
697 extern struct coding_system default_buffer_file_coding;
698
699 /* Default coding systems used for process I/O. */ 641 /* Default coding systems used for process I/O. */
700 extern Lisp_Object Vdefault_process_coding_system; 642 extern Lisp_Object Vdefault_process_coding_system;
701 643
702 /* Function to call to force a user to force select a proper coding 644 /* Function to call to force a user to force select a propert coding
703 system. */ 645 system. */
704 extern Lisp_Object Vselect_safe_coding_system_function; 646 extern Lisp_Object Vselect_safe_coding_system_function;
705 647
706 /* Coding system for file names, or nil if none. */ 648 /* Coding system for file names, or nil if none. */
707 extern Lisp_Object Vfile_name_coding_system; 649 extern Lisp_Object Vfile_name_coding_system;
713 #endif 655 #endif
714 656
715 /* Error signaled when there's a problem with detecting coding system */ 657 /* Error signaled when there's a problem with detecting coding system */
716 extern Lisp_Object Qcoding_system_error; 658 extern Lisp_Object Qcoding_system_error;
717 659
660 extern char emacs_mule_bytes[256];
661 extern int emacs_mule_string_char P_ ((unsigned char *));
662
718 #endif /* EMACS_CODING_H */ 663 #endif /* EMACS_CODING_H */