Mercurial > emacs
comparison src/coding.h @ 89483:2f877ed80fa6
*** empty log message ***
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Mon, 08 Sep 2003 12:53:41 +0000 |
| parents | 375f2633d815 e8f4dff2bfc1 |
| children | 4df223da38ce |
comparison
equal
deleted
inserted
replaced
| 88123:375f2633d815 | 89483:2f877ed80fa6 |
|---|---|
| 1 /* Header for coding system handler. | 1 /* Header for coding system handler. |
| 2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. | 2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. |
| 3 Licensed to the Free Software Foundation. | 3 Licensed to the Free Software Foundation. |
| 4 Copyright (C) 2003 | |
| 5 National Institute of Advanced Industrial Science and Technology (AIST) | |
| 6 Registration Number H13PRO009 | |
| 4 | 7 |
| 5 This file is part of GNU Emacs. | 8 This file is part of GNU Emacs. |
| 6 | 9 |
| 7 GNU Emacs is free software; you can redistribute it and/or modify | 10 GNU Emacs is free software; you can redistribute it and/or modify |
| 8 it under the terms of the GNU General Public License as published by | 11 it under the terms of the GNU General Public License as published by |
| 20 Boston, MA 02111-1307, USA. */ | 23 Boston, MA 02111-1307, USA. */ |
| 21 | 24 |
| 22 #ifndef EMACS_CODING_H | 25 #ifndef EMACS_CODING_H |
| 23 #define EMACS_CODING_H | 26 #define EMACS_CODING_H |
| 24 | 27 |
| 25 #include "ccl.h" | 28 /* Index to arguments of Fdefine_coding_system_internal. */ |
| 26 | 29 |
| 27 /*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/ | 30 enum define_coding_system_arg_index |
| 28 | 31 { |
| 29 /* All code (1-byte) of Emacs' internal format is classified into one | 32 coding_arg_name, |
| 30 of the followings. See also `charset.h'. */ | 33 coding_arg_mnemonic, |
| 31 enum emacs_code_class_type | 34 coding_arg_coding_type, |
| 32 { | 35 coding_arg_charset_list, |
| 33 EMACS_control_code, /* Control codes in the range | 36 coding_arg_ascii_compatible_p, |
| 34 0x00..0x1F and 0x7F except for the | 37 coding_arg_decode_translation_table, |
| 35 following two codes. */ | 38 coding_arg_encode_translation_table, |
| 36 EMACS_linefeed_code, /* 0x0A (linefeed) to denote | 39 coding_arg_post_read_conversion, |
| 37 end-of-line. */ | 40 coding_arg_pre_write_conversion, |
| 38 EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used | 41 coding_arg_default_char, |
| 39 in selective display mode. */ | 42 coding_arg_for_unibyte, |
| 40 EMACS_ascii_code, /* ASCII characters. */ | 43 coding_arg_plist, |
| 41 EMACS_leading_code_2, /* Base leading code of official | 44 coding_arg_eol_type, |
| 42 TYPE9N character. */ | 45 coding_arg_max |
| 43 EMACS_leading_code_3, /* Base leading code of private TYPE9N | 46 }; |
| 44 or official TYPE9Nx9N character. */ | 47 |
| 45 EMACS_leading_code_4, /* Base leading code of private | 48 enum define_coding_iso2022_arg_index |
| 46 TYPE9Nx9N character. */ | 49 { |
| 47 EMACS_invalid_code /* Invalid code, i.e. a base leading | 50 coding_arg_iso2022_initial = coding_arg_max, |
| 48 code not yet assigned to any | 51 coding_arg_iso2022_reg_usage, |
| 49 charset, or a code of the range | 52 coding_arg_iso2022_request, |
| 50 0xA0..0xFF. */ | 53 coding_arg_iso2022_flags, |
| 51 }; | 54 coding_arg_iso2022_max |
| 52 | 55 }; |
| 53 extern enum emacs_code_class_type emacs_code_class[256]; | 56 |
| 54 | 57 enum define_coding_utf16_arg_index |
| 55 /*** ISO2022 section ***/ | 58 { |
| 56 | 59 coding_arg_utf16_bom = coding_arg_max, |
| 57 /* Macros to define code of control characters for ISO2022's functions. */ | 60 coding_arg_utf16_endian, |
| 58 /* code */ /* function */ | 61 coding_arg_utf16_max |
| 59 #define ISO_CODE_LF 0x0A /* line-feed */ | 62 }; |
| 60 #define ISO_CODE_CR 0x0D /* carriage-return */ | 63 |
| 61 #define ISO_CODE_SO 0x0E /* shift-out */ | 64 enum define_coding_ccl_arg_index |
| 62 #define ISO_CODE_SI 0x0F /* shift-in */ | 65 { |
| 63 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */ | 66 coding_arg_ccl_decoder = coding_arg_max, |
| 64 #define ISO_CODE_ESC 0x1B /* escape */ | 67 coding_arg_ccl_encoder, |
| 65 #define ISO_CODE_SS2 0x8E /* single-shift-2 */ | 68 coding_arg_ccl_valids, |
| 66 #define ISO_CODE_SS3 0x8F /* single-shift-3 */ | 69 coding_arg_ccl_max |
| 67 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ | 70 }; |
| 68 | 71 |
| 69 /* All code (1-byte) of ISO2022 is classified into one of the | 72 extern Lisp_Object Vcoding_system_hash_table; |
| 70 followings. */ | 73 |
| 71 enum iso_code_class_type | 74 /* Enumeration of coding system type. */ |
| 72 { | 75 |
| 73 ISO_control_0, /* Control codes in the range | 76 enum coding_system_type |
| 74 0x00..0x1F and 0x7F, except for the | 77 { |
| 75 following 5 codes. */ | 78 coding_type_charset, |
| 76 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */ | 79 coding_type_utf_8, |
| 77 ISO_shift_out, /* ISO_CODE_SO (0x0E) */ | 80 coding_type_utf_16, |
| 78 ISO_shift_in, /* ISO_CODE_SI (0x0F) */ | 81 coding_type_iso_2022, |
| 79 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ | 82 coding_type_emacs_mule, |
| 80 ISO_escape, /* ISO_CODE_SO (0x1B) */ | 83 coding_type_sjis, |
| 81 ISO_control_1, /* Control codes in the range | 84 coding_type_ccl, |
| 82 0x80..0x9F, except for the | 85 coding_type_raw_text, |
| 83 following 3 codes. */ | 86 coding_type_undecided, |
| 84 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */ | 87 coding_type_max |
| 85 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */ | 88 }; |
| 86 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */ | 89 |
| 87 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */ | 90 |
| 88 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */ | 91 /* Enumeration of end-of-line format type. */ |
| 89 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */ | 92 |
| 90 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */ | 93 enum end_of_line_type |
| 91 }; | 94 { |
| 92 | 95 eol_lf, /* Line-feed only, same as Emacs' internal |
| 93 /** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags' | 96 format. */ |
| 94 element in the structure `coding_system'. This information is used | 97 eol_crlf, /* Sequence of carriage-return and |
| 95 while encoding a text to ISO2022. **/ | 98 line-feed. */ |
| 96 | 99 eol_cr, /* Carriage-return only. */ |
| 97 /* If set, produce short-form designation sequence (e.g. ESC $ A) | 100 eol_any, /* Accept any of above. Produce line-feed |
| 98 instead of long-form sequence (e.g. ESC $ ( A). */ | 101 only. */ |
| 99 #define CODING_FLAG_ISO_SHORT_FORM 0x0001 | 102 eol_undecided, /* This value is used to denote that the |
| 100 | 103 eol-type is not yet undecided. */ |
| 101 /* If set, reset graphic planes and registers at end-of-line to the | 104 eol_type_max |
| 102 initial state. */ | 105 }; |
| 103 #define CODING_FLAG_ISO_RESET_AT_EOL 0x0002 | 106 |
| 104 | 107 /* Enumeration of index to an attribute vector of a coding system. */ |
| 105 /* If set, reset graphic planes and registers before any control | 108 |
| 106 characters to the initial state. */ | 109 enum coding_attr_index |
| 107 #define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004 | 110 { |
| 108 | 111 coding_attr_base_name, |
| 109 /* If set, encode by 7-bit environment. */ | 112 coding_attr_docstring, |
| 110 #define CODING_FLAG_ISO_SEVEN_BITS 0x0008 | 113 coding_attr_mnemonic, |
| 111 | 114 coding_attr_type, |
| 112 /* If set, use locking-shift function. */ | 115 coding_attr_charset_list, |
| 113 #define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010 | 116 coding_attr_ascii_compat, |
| 114 | 117 coding_attr_decode_tbl, |
| 115 /* If set, use single-shift function. Overwrite | 118 coding_attr_encode_tbl, |
| 116 CODING_FLAG_ISO_LOCKING_SHIFT. */ | 119 coding_attr_post_read, |
| 117 #define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020 | 120 coding_attr_pre_write, |
| 118 | 121 coding_attr_default_char, |
| 119 /* If set, designate JISX0201-Roman instead of ASCII. */ | 122 coding_attr_for_unibyte, |
| 120 #define CODING_FLAG_ISO_USE_ROMAN 0x0040 | 123 coding_attr_plist, |
| 121 | 124 |
| 122 /* If set, designate JISX0208-1978 instead of JISX0208-1983. */ | 125 coding_attr_category, |
| 123 #define CODING_FLAG_ISO_USE_OLDJIS 0x0080 | 126 coding_attr_safe_charsets, |
| 124 | 127 |
| 125 /* If set, do not produce ISO6429's direction specifying sequence. */ | 128 /* The followings are extra attributes for each type. */ |
| 126 #define CODING_FLAG_ISO_NO_DIRECTION 0x0100 | 129 coding_attr_charset_valids, |
| 127 | 130 |
| 128 /* If set, assume designation states are reset at beginning of line on | 131 coding_attr_ccl_decoder, |
| 129 output. */ | 132 coding_attr_ccl_encoder, |
| 130 #define CODING_FLAG_ISO_INIT_AT_BOL 0x0200 | 133 coding_attr_ccl_valids, |
| 131 | 134 |
| 132 /* If set, designation sequence should be placed at beginning of line | 135 coding_attr_iso_initial, |
| 133 on output. */ | 136 coding_attr_iso_usage, |
| 134 #define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400 | 137 coding_attr_iso_request, |
| 135 | 138 coding_attr_iso_flags, |
| 136 /* If set, do not encode unsafe characters on output. */ | 139 |
| 137 #define CODING_FLAG_ISO_SAFE 0x0800 | 140 coding_attr_utf_16_bom, |
| 138 | 141 coding_attr_utf_16_endian, |
| 139 /* If set, extra latin codes (128..159) are accepted as a valid code | 142 |
| 140 on input. */ | 143 coding_attr_emacs_mule_full, |
| 141 #define CODING_FLAG_ISO_LATIN_EXTRA 0x1000 | 144 |
| 142 | 145 coding_attr_last_index |
| 143 /* If set, use designation escape sequence. */ | 146 }; |
| 144 #define CODING_FLAG_ISO_DESIGNATION 0x10000 | 147 |
| 145 | 148 |
| 146 /* A character to be produced on output if encoding of the original | 149 #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name) |
| 147 character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR. | 150 #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type) |
| 148 It must be an ASCII character. */ | 151 #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list) |
| 149 #define CODING_REPLACEMENT_CHARACTER '?' | 152 #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic) |
| 150 | 153 #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring) |
| 151 /* Structure of the field `spec.iso2022' in the structure `coding_system'. */ | 154 #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat) |
| 152 struct iso2022_spec | 155 #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl) |
| 156 #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl) | |
| 157 #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read) | |
| 158 #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write) | |
| 159 #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char) | |
| 160 #define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte) | |
| 161 #define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing) | |
| 162 #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist) | |
| 163 #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category) | |
| 164 #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets) | |
| 165 | |
| 166 | |
| 167 #define CODING_ID_ATTRS(id) \ | |
| 168 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0)) | |
| 169 | |
| 170 #define CODING_ID_ALIASES(id) \ | |
| 171 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1)) | |
| 172 | |
| 173 #define CODING_ID_EOL_TYPE(id) \ | |
| 174 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2)) | |
| 175 | |
| 176 #define CODING_ID_NAME(id) \ | |
| 177 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id)) | |
| 178 | |
| 179 #define CODING_SYSTEM_SPEC(coding_system_symbol) \ | |
| 180 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil)) | |
| 181 | |
| 182 #define CODING_SYSTEM_ID(coding_system_symbol) \ | |
| 183 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \ | |
| 184 coding_system_symbol, NULL) | |
| 185 | |
| 186 #define CODING_SYSTEM_P(coding_system_symbol) \ | |
| 187 (! NILP (CODING_SYSTEM_SPEC (coding_system_symbol))) | |
| 188 | |
| 189 #define CHECK_CODING_SYSTEM(x) \ | |
| 190 do { \ | |
| 191 if (!CODING_SYSTEM_P (x)) \ | |
| 192 wrong_type_argument (Qcoding_system_p, (x)); \ | |
| 193 } while (0) | |
| 194 | |
| 195 | |
| 196 #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \ | |
| 197 do { \ | |
| 198 spec = CODING_SYSTEM_SPEC (x); \ | |
| 199 if (NILP (spec)) \ | |
| 200 x = wrong_type_argument (Qcoding_system_p, (x)); \ | |
| 201 } while (0) | |
| 202 | |
| 203 | |
| 204 #define CHECK_CODING_SYSTEM_GET_ID(x, id) \ | |
| 205 do \ | |
| 206 { \ | |
| 207 id = CODING_SYSTEM_ID (x); \ | |
| 208 if (id < 0) \ | |
| 209 x = wrong_type_argument (Qcoding_system_p, (x)); \ | |
| 210 } while (0) | |
| 211 | |
| 212 | |
| 213 /*** GENERAL section ***/ | |
| 214 | |
| 215 /* Enumeration of result code of code conversion. */ | |
| 216 enum coding_result_code | |
| 217 { | |
| 218 CODING_RESULT_SUCCESS, | |
| 219 CODING_RESULT_INSUFFICIENT_SRC, | |
| 220 CODING_RESULT_INSUFFICIENT_DST, | |
| 221 CODING_RESULT_INCONSISTENT_EOL, | |
| 222 CODING_RESULT_INSUFFICIENT_CMP, | |
| 223 CODING_RESULT_INTERRUPT, | |
| 224 CODING_RESULT_INSUFFICIENT_MEM | |
| 225 }; | |
| 226 | |
| 227 | |
| 228 /* Macros used for the member `mode' of the struct coding_system. */ | |
| 229 | |
| 230 /* If set, recover the original CR or LF of the already decoded text | |
| 231 when the decoding routine encounters an inconsistent eol format. */ | |
| 232 #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01 | |
| 233 | |
| 234 /* If set, the decoding/encoding routines treat the current data as | |
| 235 the last block of the whole text to be converted, and do the | |
| 236 appropriate finishing job. */ | |
| 237 #define CODING_MODE_LAST_BLOCK 0x02 | |
| 238 | |
| 239 /* If set, it means that the current source text is in a buffer which | |
| 240 enables selective display. */ | |
| 241 #define CODING_MODE_SELECTIVE_DISPLAY 0x04 | |
| 242 | |
| 243 /* This flag is used by the decoding/encoding routines on the fly. If | |
| 244 set, it means that right-to-left text is being processed. */ | |
| 245 #define CODING_MODE_DIRECTION 0x08 | |
| 246 | |
| 247 #define CODING_MODE_FIXED_DESTINATION 0x10 | |
| 248 | |
| 249 #define CODING_MODE_SAFE_ENCODING 0x20 | |
| 250 | |
| 251 /* Structure of the field `spec.iso_2022' in the structure | |
| 252 `coding_system'. */ | |
| 253 struct iso_2022_spec | |
| 153 { | 254 { |
| 255 /* Bit-wise-or of CODING_ISO_FLAG_XXX. */ | |
| 256 unsigned flags; | |
| 257 | |
| 154 /* The current graphic register invoked to each graphic plane. */ | 258 /* The current graphic register invoked to each graphic plane. */ |
| 155 int current_invocation[2]; | 259 int current_invocation[2]; |
| 156 | 260 |
| 157 /* The current charset designated to each graphic register. */ | 261 /* The current charset designated to each graphic register. The |
| 262 value -1 means that not charset is designated, -2 means that | |
| 263 there was an invalid designation previously. */ | |
| 158 int current_designation[4]; | 264 int current_designation[4]; |
| 159 | |
| 160 /* A charset initially designated to each graphic register. */ | |
| 161 int initial_designation[4]; | |
| 162 | |
| 163 /* If not -1, it is a graphic register specified in an invalid | |
| 164 designation sequence. */ | |
| 165 int last_invalid_designation_register; | |
| 166 | |
| 167 /* A graphic register to which each charset should be designated. */ | |
| 168 unsigned char requested_designation[MAX_CHARSET + 1]; | |
| 169 | |
| 170 /* A revision number to be specified for each charset on encoding. | |
| 171 The value 255 means no revision number for the corresponding | |
| 172 charset. */ | |
| 173 unsigned char charset_revision_number[MAX_CHARSET + 1]; | |
| 174 | 265 |
| 175 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked | 266 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked |
| 176 by single-shift while encoding. */ | 267 by single-shift while encoding. */ |
| 177 int single_shifting; | 268 int single_shifting; |
| 178 | 269 |
| 179 /* Set to 1 temporarily only when processing at beginning of line. */ | 270 /* Set to 1 temporarily only when processing at beginning of line. */ |
| 180 int bol; | 271 int bol; |
| 181 }; | 272 }; |
| 182 | 273 |
| 183 /* Macros to access each field in the structure `spec.iso2022'. */ | 274 struct ccl_spec; |
| 184 #define CODING_SPEC_ISO_INVOCATION(coding, plane) \ | 275 |
| 185 (coding)->spec.iso2022.current_invocation[plane] | 276 enum utf_16_bom_type |
| 186 #define CODING_SPEC_ISO_DESIGNATION(coding, reg) \ | 277 { |
| 187 (coding)->spec.iso2022.current_designation[reg] | 278 utf_16_detect_bom, |
| 188 #define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \ | 279 utf_16_without_bom, |
| 189 (coding)->spec.iso2022.initial_designation[reg] | 280 utf_16_with_bom |
| 190 #define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \ | 281 }; |
| 191 (coding)->spec.iso2022.requested_designation[charset] | 282 |
| 192 #define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \ | 283 enum utf_16_endian_type |
| 193 (coding)->spec.iso2022.charset_revision_number[charset] | 284 { |
| 194 #define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \ | 285 utf_16_big_endian, |
| 195 (coding)->spec.iso2022.single_shifting | 286 utf_16_little_endian |
| 196 #define CODING_SPEC_ISO_BOL(coding) \ | 287 }; |
| 197 (coding)->spec.iso2022.bol | 288 |
| 198 | 289 struct utf_16_spec |
| 199 /* A value which may appear in | |
| 200 coding->spec.iso2022.requested_designation indicating that the | |
| 201 corresponding charset does not request any graphic register to be | |
| 202 designated. */ | |
| 203 #define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4 | |
| 204 | |
| 205 /* Return a charset which is currently designated to the graphic plane | |
| 206 PLANE in the coding-system CODING. */ | |
| 207 #define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \ | |
| 208 ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \ | |
| 209 ? -1 \ | |
| 210 : CODING_SPEC_ISO_DESIGNATION (coding, \ | |
| 211 CODING_SPEC_ISO_INVOCATION (coding, plane))) | |
| 212 | |
| 213 /*** BIG5 section ***/ | |
| 214 | |
| 215 /* Macros to denote each type of BIG5 coding system. */ | |
| 216 #define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of | |
| 217 BIG5 developed by Hong Kong | |
| 218 University. */ | |
| 219 #define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants | |
| 220 of BIG5 developed by the | |
| 221 company ETen in Taiwan. */ | |
| 222 | |
| 223 /*** GENERAL section ***/ | |
| 224 | |
| 225 /* Types of coding system. */ | |
| 226 enum coding_type | |
| 227 { | |
| 228 coding_type_no_conversion, /* A coding system which requires no | |
| 229 conversion for reading and writing | |
| 230 including end-of-line format. */ | |
| 231 coding_type_emacs_mule, /* A coding system used in Emacs' | |
| 232 buffer and string. Requires no | |
| 233 conversion for reading and writing | |
| 234 except for end-of-line format. */ | |
| 235 coding_type_undecided, /* A coding system which requires | |
| 236 automatic detection of a real | |
| 237 coding system. */ | |
| 238 coding_type_sjis, /* SJIS coding system for Japanese. */ | |
| 239 coding_type_iso2022, /* Any coding system of ISO2022 | |
| 240 variants. */ | |
| 241 coding_type_big5, /* BIG5 coding system for Chinese. */ | |
| 242 coding_type_ccl, /* The coding system of which decoder | |
| 243 and encoder are written in CCL. */ | |
| 244 coding_type_raw_text /* A coding system for a text | |
| 245 containing random 8-bit code which | |
| 246 does not require code conversion | |
| 247 except for end-of-line format. */ | |
| 248 }; | |
| 249 | |
| 250 /* Formats of end-of-line. */ | |
| 251 #define CODING_EOL_LF 0 /* Line-feed only, same as Emacs' | |
| 252 internal format. */ | |
| 253 #define CODING_EOL_CRLF 1 /* Sequence of carriage-return and | |
| 254 line-feed. */ | |
| 255 #define CODING_EOL_CR 2 /* Carriage-return only. */ | |
| 256 #define CODING_EOL_UNDECIDED 3 /* This value is used to denote the | |
| 257 eol-type is not yet decided. */ | |
| 258 #define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the | |
| 259 eol-type is not consistent | |
| 260 through the file. */ | |
| 261 | |
| 262 /* 1 iff composing. */ | |
| 263 #define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO) | |
| 264 | |
| 265 #define COMPOSITION_DATA_SIZE 4080 | |
| 266 #define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2) | |
| 267 | |
| 268 /* Data structure to hold information about compositions of text that | |
| 269 is being decoded or encode. ISO 2022 base code conversion routines | |
| 270 handle special ESC sequences for composition specification. But, | |
| 271 they can't get/put such information directly from/to a buffer in | |
| 272 the deepest place. So, they store or retrieve the information | |
| 273 through this structure. | |
| 274 | |
| 275 The encoder stores the information in this structure when it meets | |
| 276 ESC sequences for composition while encoding codes, then, after all | |
| 277 text codes are encoded, puts `composition' properties on the text | |
| 278 by referring to the structure. | |
| 279 | |
| 280 The decoder at first stores the information of a text to be | |
| 281 decoded, then, while decoding codes, generates ESC sequences for | |
| 282 composition at proper places by referring to the structure. */ | |
| 283 | |
| 284 struct composition_data | |
| 285 { | 290 { |
| 286 /* The character position of the first character to be encoded or | 291 enum utf_16_bom_type bom; |
| 287 decoded. START and END (see below) are relative to this | 292 enum utf_16_endian_type endian; |
| 288 position. */ | 293 int surrogate; |
| 289 int char_offset; | |
| 290 | |
| 291 /* The composition data. These elements are repeated for each | |
| 292 composition: | |
| 293 LENGTH START END METHOD [ COMPONENT ... ] | |
| 294 where, | |
| 295 LENGTH is the number of elements for this composition. | |
| 296 | |
| 297 START and END are starting and ending character positions of | |
| 298 the composition relative to `char_offset'. | |
| 299 | |
| 300 METHOD is one of `enum composing_status' specifying the way of | |
| 301 composition. | |
| 302 | |
| 303 COMPONENT is a character or an encoded composition rule. */ | |
| 304 int data[COMPOSITION_DATA_SIZE]; | |
| 305 | |
| 306 /* The number of elements in `data' currently used. */ | |
| 307 int used; | |
| 308 | |
| 309 /* Pointers to the previous and next structures. When `data' is | |
| 310 filled up, another structure is allocated and linked in `next'. | |
| 311 The new structure has backward link to this structure in `prev'. | |
| 312 The number of chained structures depends on how many compositions | |
| 313 the text being encoded or decoded contains. */ | |
| 314 struct composition_data *prev, *next; | |
| 315 }; | 294 }; |
| 316 | 295 |
| 317 /* Macros used for the member `result' of the struct | 296 struct coding_detection_info |
| 318 coding_system. */ | 297 { |
| 319 #define CODING_FINISH_NORMAL 0 | 298 /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */ |
| 320 #define CODING_FINISH_INSUFFICIENT_SRC 1 | 299 /* Which categories are already checked. */ |
| 321 #define CODING_FINISH_INSUFFICIENT_DST 2 | 300 int checked; |
| 322 #define CODING_FINISH_INCONSISTENT_EOL 3 | 301 /* Which categories are strongly found. */ |
| 323 #define CODING_FINISH_INSUFFICIENT_CMP 4 | 302 int found; |
| 324 #define CODING_FINISH_INTERRUPT 5 | 303 /* Which categories are rejected. */ |
| 325 | 304 int rejected; |
| 326 /* Macros used for the member `mode' of the struct coding_system. */ | 305 }; |
| 327 | 306 |
| 328 /* If set, recover the original CR or LF of the already decoded text | |
| 329 when the decoding routine encounters an inconsistent eol format. */ | |
| 330 #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01 | |
| 331 | |
| 332 /* If set, the decoding/encoding routines treat the current data as | |
| 333 the last block of the whole text to be converted, and do | |
| 334 appropriate finishing job. */ | |
| 335 #define CODING_MODE_LAST_BLOCK 0x02 | |
| 336 | |
| 337 /* If set, it means that the current source text is in a buffer which | |
| 338 enables selective display. */ | |
| 339 #define CODING_MODE_SELECTIVE_DISPLAY 0x04 | |
| 340 | |
| 341 /* If set, replace unencodabae characters by `?' on encoding. */ | |
| 342 #define CODING_MODE_INHIBIT_UNENCODABLE_CHAR 0x08 | |
| 343 | |
| 344 /* This flag is used by the decoding/encoding routines on the fly. If | |
| 345 set, it means that right-to-left text is being processed. */ | |
| 346 #define CODING_MODE_DIRECTION 0x10 | |
| 347 | 307 |
| 348 struct coding_system | 308 struct coding_system |
| 349 { | 309 { |
| 350 /* Type of the coding system. */ | 310 /* ID number of the coding system. This is an index to |
| 351 enum coding_type type; | 311 Vcoding_system_hash_table. This value is set by |
| 352 | 312 setup_coding_system. At the early stage of building time, this |
| 353 /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */ | 313 value is -1 in the array coding_categories to indicate that no |
| 354 int eol_type; | 314 coding-system of that category is yet defined. */ |
| 315 int id; | |
| 355 | 316 |
| 356 /* Flag bits of the coding system. The meaning of each bit is common | 317 /* Flag bits of the coding system. The meaning of each bit is common |
| 357 to all types of coding systems. */ | 318 to all types of coding systems. */ |
| 358 unsigned int common_flags; | 319 int common_flags; |
| 359 | |
| 360 /* Flag bits of the coding system. The meaning of each bit depends | |
| 361 on the type of the coding system. */ | |
| 362 unsigned int flags; | |
| 363 | 320 |
| 364 /* Mode bits of the coding system. See the comments of the macros | 321 /* Mode bits of the coding system. See the comments of the macros |
| 365 CODING_MODE_XXX. */ | 322 CODING_MODE_XXX. */ |
| 366 unsigned int mode; | 323 unsigned int mode; |
| 367 | 324 |
| 368 /* The current status of composition handling. */ | |
| 369 int composing; | |
| 370 | |
| 371 /* 1 iff the next character is a composition rule. */ | |
| 372 int composition_rule_follows; | |
| 373 | |
| 374 /* Information of compositions are stored here on decoding and set | |
| 375 in advance on encoding. */ | |
| 376 struct composition_data *cmp_data; | |
| 377 | |
| 378 /* Index to cmp_data->data for the first element for the current | |
| 379 composition. */ | |
| 380 int cmp_data_start; | |
| 381 | |
| 382 /* Index to cmp_data->data for the current element for the current | |
| 383 composition. */ | |
| 384 int cmp_data_index; | |
| 385 | |
| 386 /* Detailed information specific to each type of coding system. */ | 325 /* Detailed information specific to each type of coding system. */ |
| 387 union spec | 326 union |
| 388 { | 327 { |
| 389 struct iso2022_spec iso2022; | 328 struct iso_2022_spec iso_2022; |
| 390 struct ccl_spec ccl; /* Defined in ccl.h. */ | 329 struct ccl_spec *ccl; /* Defined in ccl.h. */ |
| 330 struct utf_16_spec utf_16; | |
| 331 int emacs_mule_full_support; | |
| 391 } spec; | 332 } spec; |
| 392 | 333 |
| 393 /* Index number of coding category of the coding system. */ | 334 int max_charset_id; |
| 394 int category_idx; | 335 char *safe_charsets; |
| 395 | 336 |
| 396 /* The following two members specify how characters 128..159 are | 337 /* The following two members specify how binary 8-bit code 128..255 |
| 397 represented in source and destination text respectively. 1 means | 338 are represented in source and destination text respectively. 1 |
| 398 they are represented by 2-byte sequence, 0 means they are | 339 means they are represented by 2-byte sequence, 0 means they are |
| 399 represented by 1-byte as is (see the comment in charset.h). */ | 340 represented by 1-byte as is (see the comment in character.h). */ |
| 400 unsigned src_multibyte : 1; | 341 unsigned src_multibyte : 1; |
| 401 unsigned dst_multibyte : 1; | 342 unsigned dst_multibyte : 1; |
| 402 | 343 |
| 403 /* How may heading bytes we can skip for decoding. This is set to | 344 /* How may heading bytes we can skip for decoding. This is set to |
| 404 -1 in setup_coding_system, and updated by detect_coding. So, | 345 -1 in setup_coding_system, and updated by detect_coding. So, |
| 405 when this is equal to the byte length of the text being | 346 when this is equal to the byte length of the text being |
| 406 converted, we can skip the actual conversion process. */ | 347 converted, we can skip the actual conversion process. */ |
| 407 int heading_ascii; | 348 int head_ascii; |
| 408 | 349 |
| 409 /* The following members are set by encoding/decoding routine. */ | 350 /* The following members are set by encoding/decoding routine. */ |
| 410 int produced, produced_char, consumed, consumed_char; | 351 EMACS_INT produced, produced_char, consumed, consumed_char; |
| 411 | 352 |
| 412 /* Number of error source data found in a decoding routine. */ | 353 /* Number of error source data found in a decoding routine. */ |
| 413 int errors; | 354 int errors; |
| 414 | 355 |
| 415 /* Finish status of code conversion. It should be one of macros | 356 /* Store the positions of error source data. */ |
| 416 CODING_FINISH_XXXX. */ | 357 EMACS_INT *error_positions; |
| 417 int result; | 358 |
| 418 | 359 /* Finish status of code conversion. */ |
| 419 /* If nonzero, suppress error notification. */ | 360 enum coding_result_code result; |
| 420 int suppress_error; | |
| 421 | 361 |
| 422 /* The following members are all Lisp symbols. We don't have to | 362 /* The following members are all Lisp symbols. We don't have to |
| 423 protect them from GC because the current garbage collection | 363 protect them from GC because the current garbage collection |
| 424 doesn't relocate Lisp symbols. But, when it is changed, we must | 364 doesn't relocate Lisp symbols. But, when it is changed, we must |
| 425 find a way to protect them. */ | 365 find a way to protect them. */ |
| 426 | 366 |
| 427 /* Backward pointer to the Lisp symbol of the coding system. */ | 367 EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes; |
| 428 Lisp_Object symbol; | 368 Lisp_Object src_object; |
| 429 | 369 const unsigned char *source; |
| 430 /* Lisp function (symbol) to be called after decoding to do | 370 |
| 431 additional conversion, or nil. */ | 371 EMACS_INT dst_pos, dst_pos_byte, dst_bytes; |
| 432 Lisp_Object post_read_conversion; | 372 Lisp_Object dst_object; |
| 433 | 373 unsigned char *destination; |
| 434 /* Lisp function (symbol) to be called before encoding to do | 374 |
| 435 additional conversion, or nil. */ | 375 int chars_at_source; |
| 436 Lisp_Object pre_write_conversion; | 376 |
| 437 | 377 /* If an element is non-negative, it is a character code. |
| 438 /* Character translation tables to look up, or nil. */ | 378 |
| 439 Lisp_Object translation_table_for_decode; | 379 If it is in the range -128..-1, it is a 8-bit character code |
| 440 Lisp_Object translation_table_for_encode; | 380 minus 256. |
| 381 | |
| 382 If it is less than -128, it specifies the start of an annotation | |
| 383 chunk. The length of the chunk is -128 minus the value of the | |
| 384 element. The following elements are OFFSET, ANNOTATION-TYPE, and | |
| 385 a sequence of actual data for the annotation. OFFSET is a | |
| 386 character position offset from dst_pos or src_pos, | |
| 387 ANNOTATION-TYPE specfies the meaning of the annotation and how to | |
| 388 handle the following data.. */ | |
| 389 int *charbuf; | |
| 390 int charbuf_size, charbuf_used; | |
| 391 | |
| 392 /* Set to 1 if charbuf contains an annotation. */ | |
| 393 int annotated; | |
| 394 | |
| 395 unsigned char carryover[64]; | |
| 396 int carryover_bytes; | |
| 397 | |
| 398 int default_char; | |
| 399 | |
| 400 int (*detector) P_ ((struct coding_system *, | |
| 401 struct coding_detection_info *)); | |
| 402 void (*decoder) P_ ((struct coding_system *)); | |
| 403 int (*encoder) P_ ((struct coding_system *)); | |
| 441 }; | 404 }; |
| 442 | 405 |
| 443 #define CODING_REQUIRE_FLUSHING_MASK 1 | 406 /* Meanings of bits in the member `common_flags' of the structure |
| 444 #define CODING_REQUIRE_DECODING_MASK 2 | 407 coding_system. The lowest 8 bits are reserved for various kind of |
| 445 #define CODING_REQUIRE_ENCODING_MASK 4 | 408 annotations (currently two of them are used). */ |
| 446 #define CODING_REQUIRE_DETECTION_MASK 8 | 409 #define CODING_ANNOTATION_MASK 0x00FF |
| 447 | 410 #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001 |
| 448 /* Return 1 if the coding system CODING requires specific code to be | 411 #define CODING_ANNOTATE_DIRECTION_MASK 0x0002 |
| 412 #define CODING_ANNOTATE_CHARSET_MASK 0x0003 | |
| 413 #define CODING_FOR_UNIBYTE_MASK 0x0100 | |
| 414 #define CODING_REQUIRE_FLUSHING_MASK 0x0200 | |
| 415 #define CODING_REQUIRE_DECODING_MASK 0x0400 | |
| 416 #define CODING_REQUIRE_ENCODING_MASK 0x0800 | |
| 417 #define CODING_REQUIRE_DETECTION_MASK 0x1000 | |
| 418 #define CODING_RESET_AT_BOL_MASK 0x2000 | |
| 419 | |
| 420 /* Return 1 if the coding context CODING requires annotaion | |
| 421 handling. */ | |
| 422 #define CODING_REQUIRE_ANNOTATION(coding) \ | |
| 423 ((coding)->common_flags & CODING_ANNOTATION_MASK) | |
| 424 | |
| 425 /* Return 1 if the coding context CODING prefers decoding into unibyte. */ | |
| 426 #define CODING_FOR_UNIBYTE(coding) \ | |
| 427 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK) | |
| 428 | |
| 429 /* Return 1 if the coding context CODING requires specific code to be | |
| 449 attached at the tail of converted text. */ | 430 attached at the tail of converted text. */ |
| 450 #define CODING_REQUIRE_FLUSHING(coding) \ | 431 #define CODING_REQUIRE_FLUSHING(coding) \ |
| 451 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK) | 432 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK) |
| 452 | 433 |
| 453 /* Return 1 if the coding system CODING requires code conversion on | 434 /* Return 1 if the coding context CODING requires code conversion on |
| 454 decoding. */ | 435 decoding. */ |
| 455 #define CODING_REQUIRE_DECODING(coding) \ | 436 #define CODING_REQUIRE_DECODING(coding) \ |
| 456 ((coding)->dst_multibyte \ | 437 ((coding)->dst_multibyte \ |
| 457 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) | 438 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) |
| 458 | 439 |
| 459 /* Return 1 if the coding system CODING requires code conversion on | 440 |
| 441 /* Return 1 if the coding context CODING requires code conversion on | |
| 460 encoding. */ | 442 encoding. */ |
| 461 #define CODING_REQUIRE_ENCODING(coding) \ | 443 #define CODING_REQUIRE_ENCODING(coding) \ |
| 462 ((coding)->src_multibyte \ | 444 ((coding)->src_multibyte \ |
| 463 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK) | 445 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \ |
| 464 | 446 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY) |
| 465 /* Return 1 if the coding system CODING requires some kind of code | 447 |
| 448 | |
| 449 /* Return 1 if the coding context CODING requires some kind of code | |
| 466 detection. */ | 450 detection. */ |
| 467 #define CODING_REQUIRE_DETECTION(coding) \ | 451 #define CODING_REQUIRE_DETECTION(coding) \ |
| 468 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) | 452 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) |
| 469 | 453 |
| 470 /* Return 1 if the coding system CODING requires code conversion on | 454 /* Return 1 if the coding context CODING requires code conversion on |
| 471 decoding or some kind of code detection. */ | 455 decoding or some kind of code detection. */ |
| 472 #define CODING_MAY_REQUIRE_DECODING(coding) \ | 456 #define CODING_MAY_REQUIRE_DECODING(coding) \ |
| 473 (CODING_REQUIRE_DECODING (coding) \ | 457 (CODING_REQUIRE_DECODING (coding) \ |
| 474 || CODING_REQUIRE_DETECTION (coding)) | 458 || CODING_REQUIRE_DETECTION (coding)) |
| 475 | 459 |
| 476 /* Index for each coding category in `coding_category_table' */ | |
| 477 #define CODING_CATEGORY_IDX_EMACS_MULE 0 | |
| 478 #define CODING_CATEGORY_IDX_SJIS 1 | |
| 479 #define CODING_CATEGORY_IDX_ISO_7 2 | |
| 480 #define CODING_CATEGORY_IDX_ISO_7_TIGHT 3 | |
| 481 #define CODING_CATEGORY_IDX_ISO_8_1 4 | |
| 482 #define CODING_CATEGORY_IDX_ISO_8_2 5 | |
| 483 #define CODING_CATEGORY_IDX_ISO_7_ELSE 6 | |
| 484 #define CODING_CATEGORY_IDX_ISO_8_ELSE 7 | |
| 485 #define CODING_CATEGORY_IDX_CCL 8 | |
| 486 #define CODING_CATEGORY_IDX_BIG5 9 | |
| 487 #define CODING_CATEGORY_IDX_UTF_8 10 | |
| 488 #define CODING_CATEGORY_IDX_UTF_16_BE 11 | |
| 489 #define CODING_CATEGORY_IDX_UTF_16_LE 12 | |
| 490 #define CODING_CATEGORY_IDX_RAW_TEXT 13 | |
| 491 #define CODING_CATEGORY_IDX_BINARY 14 | |
| 492 #define CODING_CATEGORY_IDX_MAX 15 | |
| 493 | |
| 494 /* Definitions of flag bits returned by the function | |
| 495 detect_coding_mask (). */ | |
| 496 #define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE) | |
| 497 #define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS) | |
| 498 #define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7) | |
| 499 #define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT) | |
| 500 #define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1) | |
| 501 #define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2) | |
| 502 #define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE) | |
| 503 #define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE) | |
| 504 #define CODING_CATEGORY_MASK_CCL (1 << CODING_CATEGORY_IDX_CCL) | |
| 505 #define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5) | |
| 506 #define CODING_CATEGORY_MASK_UTF_8 (1 << CODING_CATEGORY_IDX_UTF_8) | |
| 507 #define CODING_CATEGORY_MASK_UTF_16_BE (1 << CODING_CATEGORY_IDX_UTF_16_BE) | |
| 508 #define CODING_CATEGORY_MASK_UTF_16_LE (1 << CODING_CATEGORY_IDX_UTF_16_LE) | |
| 509 #define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT) | |
| 510 #define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY) | |
| 511 | |
| 512 /* This value is returned if detect_coding_mask () find nothing other | |
| 513 than ASCII characters. */ | |
| 514 #define CODING_CATEGORY_MASK_ANY \ | |
| 515 ( CODING_CATEGORY_MASK_EMACS_MULE \ | |
| 516 | CODING_CATEGORY_MASK_SJIS \ | |
| 517 | CODING_CATEGORY_MASK_ISO_7 \ | |
| 518 | CODING_CATEGORY_MASK_ISO_7_TIGHT \ | |
| 519 | CODING_CATEGORY_MASK_ISO_8_1 \ | |
| 520 | CODING_CATEGORY_MASK_ISO_8_2 \ | |
| 521 | CODING_CATEGORY_MASK_ISO_7_ELSE \ | |
| 522 | CODING_CATEGORY_MASK_ISO_8_ELSE \ | |
| 523 | CODING_CATEGORY_MASK_CCL \ | |
| 524 | CODING_CATEGORY_MASK_BIG5 \ | |
| 525 | CODING_CATEGORY_MASK_UTF_8 \ | |
| 526 | CODING_CATEGORY_MASK_UTF_16_BE \ | |
| 527 | CODING_CATEGORY_MASK_UTF_16_LE) | |
| 528 | |
| 529 #define CODING_CATEGORY_MASK_ISO_7BIT \ | |
| 530 (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT) | |
| 531 | |
| 532 #define CODING_CATEGORY_MASK_ISO_8BIT \ | |
| 533 (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2) | |
| 534 | |
| 535 #define CODING_CATEGORY_MASK_ISO_SHIFT \ | |
| 536 (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE) | |
| 537 | |
| 538 #define CODING_CATEGORY_MASK_ISO \ | |
| 539 ( CODING_CATEGORY_MASK_ISO_7BIT \ | |
| 540 | CODING_CATEGORY_MASK_ISO_SHIFT \ | |
| 541 | CODING_CATEGORY_MASK_ISO_8BIT) | |
| 542 | |
| 543 #define CODING_CATEGORY_MASK_UTF_16_BE_LE \ | |
| 544 (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE) | |
| 545 | |
| 546 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and | 460 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and |
| 547 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding | 461 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding |
| 548 system. C1 and C2 are the 1st and 2nd position codes of Emacs' | 462 system. C1 and C2 are the 1st and 2nd position codes of Emacs' |
| 549 internal format. */ | 463 internal format. */ |
| 550 | 464 |
| 551 #define DECODE_SJIS(s1, s2, c1, c2) \ | 465 #define SJIS_TO_JIS(code) \ |
| 552 do { \ | 466 do { \ |
| 553 if (s2 >= 0x9F) \ | 467 int s1, s2, j1, j2; \ |
| 554 c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \ | 468 \ |
| 555 c2 = s2 - 0x7E; \ | 469 s1 = (code) >> 8, s2 = (code) & 0xFF; \ |
| 556 else \ | 470 \ |
| 557 c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \ | 471 if (s2 >= 0x9F) \ |
| 558 c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F); \ | 472 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \ |
| 473 j2 = s2 - 0x7E); \ | |
| 474 else \ | |
| 475 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \ | |
| 476 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \ | |
| 477 (code) = (j1 << 8) | j2; \ | |
| 559 } while (0) | 478 } while (0) |
| 560 | 479 |
| 561 #define ENCODE_SJIS(c1, c2, s1, s2) \ | 480 |
| 481 #define JIS_TO_SJIS(code) \ | |
| 562 do { \ | 482 do { \ |
| 563 if (c1 & 1) \ | 483 int s1, s2, j1, j2; \ |
| 564 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1), \ | 484 \ |
| 565 s2 = c2 + ((c2 >= 0x60) ? 0x20 : 0x1F); \ | 485 j1 = (code) >> 8, j2 = (code) & 0xFF; \ |
| 486 if (j1 & 1) \ | |
| 487 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \ | |
| 488 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \ | |
| 566 else \ | 489 else \ |
| 567 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0), \ | 490 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \ |
| 568 s2 = c2 + 0x7E; \ | 491 s2 = j2 + 0x7E); \ |
| 492 (code) = (s1 << 8) | s2; \ | |
| 569 } while (0) | 493 } while (0) |
| 494 | |
| 570 | 495 |
| 571 /* Encode the file name NAME using the specified coding system | 496 /* Encode the file name NAME using the specified coding system |
| 572 for file names, if any. */ | 497 for file names, if any. */ |
| 573 #define ENCODE_FILE(name) \ | 498 #define ENCODE_FILE(name) \ |
| 574 (! NILP (Vfile_name_coding_system) \ | 499 (! NILP (Vfile_name_coding_system) \ |
| 577 : (! NILP (Vdefault_file_name_coding_system) \ | 502 : (! NILP (Vdefault_file_name_coding_system) \ |
| 578 && XFASTINT (Vdefault_file_name_coding_system) != 0 \ | 503 && XFASTINT (Vdefault_file_name_coding_system) != 0 \ |
| 579 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ | 504 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ |
| 580 : name)) | 505 : name)) |
| 581 | 506 |
| 507 | |
| 582 /* Decode the file name NAME using the specified coding system | 508 /* Decode the file name NAME using the specified coding system |
| 583 for file names, if any. */ | 509 for file names, if any. */ |
| 584 #define DECODE_FILE(name) \ | 510 #define DECODE_FILE(name) \ |
| 585 (! NILP (Vfile_name_coding_system) \ | 511 (! NILP (Vfile_name_coding_system) \ |
| 586 && XFASTINT (Vfile_name_coding_system) != 0 \ | 512 && XFASTINT (Vfile_name_coding_system) != 0 \ |
| 588 : (! NILP (Vdefault_file_name_coding_system) \ | 514 : (! NILP (Vdefault_file_name_coding_system) \ |
| 589 && XFASTINT (Vdefault_file_name_coding_system) != 0 \ | 515 && XFASTINT (Vdefault_file_name_coding_system) != 0 \ |
| 590 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ | 516 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ |
| 591 : name)) | 517 : name)) |
| 592 | 518 |
| 519 | |
| 593 #ifdef WINDOWSNT | 520 #ifdef WINDOWSNT |
| 594 /* Encode the string STR using the specified coding system | 521 /* Encode the string STR using the specified coding system |
| 595 for w32 system functions, if any. */ | 522 for w32 system functions, if any. */ |
| 596 #define ENCODE_SYSTEM(str) \ | 523 #define ENCODE_SYSTEM(str) \ |
| 597 (! NILP (Vlocale_coding_system) \ | 524 (! NILP (Vlocale_coding_system) \ |
| 598 && XFASTINT (Vlocale_coding_system) != 0 \ | 525 && XFASTINT (Vlocale_coding_system) != 0 \ |
| 599 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ | 526 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ |
| 600 : str) | 527 : str) |
| 601 | 528 |
| 602 /* Decode the string STR using the specified coding system | 529 /* Decode the string STR using the specified coding system |
| 603 for w32 system functions, if any. */ | 530 for w32 system functions, if any. */ |
| 604 #define DECODE_SYSTEM(name) \ | 531 #define DECODE_SYSTEM(name) \ |
| 605 (! NILP (Vlocale_coding_system) \ | 532 (! NILP (Vlocale_coding_system) \ |
| 606 && XFASTINT (Vlocale_coding_system) != 0 \ | 533 && XFASTINT (Vlocale_coding_system) != 0 \ |
| 607 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ | 534 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ |
| 608 : str) | 535 : str) |
| 609 | 536 |
| 610 #else /* WINDOWSNT */ | 537 #else /* WINDOWSNT */ |
| 611 | 538 |
| 612 #define ENCODE_SYSTEM(str) string_make_unibyte(str) | 539 #define ENCODE_SYSTEM(str) string_make_unibyte(str) |
| 613 #define DECODE_SYSTEM(name) name | 540 #define DECODE_SYSTEM(name) name |
| 614 | 541 |
| 615 #endif /* !WINDOWSNT */ | 542 #endif /* !WINDOWSNT */ |
| 616 | 543 |
| 617 #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1) | |
| 618 | |
| 619 /* Extern declarations. */ | 544 /* Extern declarations. */ |
| 620 extern int decode_coding P_ ((struct coding_system *, const unsigned char *, | 545 extern Lisp_Object make_conversion_work_buffer P_ ((int, int)); |
| 621 unsigned char *, int, int)); | |
| 622 extern int encode_coding P_ ((struct coding_system *, const unsigned char *, | |
| 623 unsigned char *, int, int)); | |
| 624 extern void coding_save_composition P_ ((struct coding_system *, int, int, | |
| 625 Lisp_Object)); | |
| 626 extern void coding_free_composition_data P_ ((struct coding_system *)); | |
| 627 extern void coding_adjust_composition_offset P_ ((struct coding_system *, | |
| 628 int)); | |
| 629 extern void coding_allocate_composition_data P_ ((struct coding_system *, | |
| 630 int)); | |
| 631 extern void coding_restore_composition P_ ((struct coding_system *, | |
| 632 Lisp_Object)); | |
| 633 extern int code_convert_region P_ ((int, int, int, int, struct coding_system *, | |
| 634 int, int)); | |
| 635 extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object, | |
| 636 struct coding_system *, | |
| 637 int)); | |
| 638 extern int decoding_buffer_size P_ ((struct coding_system *, int)); | 546 extern int decoding_buffer_size P_ ((struct coding_system *, int)); |
| 639 extern int encoding_buffer_size P_ ((struct coding_system *, int)); | 547 extern int encoding_buffer_size P_ ((struct coding_system *, int)); |
| 640 extern void detect_coding P_ ((struct coding_system *, const unsigned char *, | 548 extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *)); |
| 641 int)); | 549 extern void detect_coding P_ ((struct coding_system *)); |
| 642 extern void detect_eol P_ ((struct coding_system *, const unsigned char *, | 550 extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object, |
| 643 int)); | 551 Lisp_Object, Lisp_Object, |
| 644 extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *)); | 552 int, int)); |
| 645 extern Lisp_Object code_convert_string P_ ((Lisp_Object, | 553 extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object, |
| 646 struct coding_system *, int, int)); | 554 Lisp_Object, int, int, int)); |
| 647 extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object, | |
| 648 Lisp_Object, int)); | |
| 649 extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object, | 555 extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object, |
| 650 int)); | 556 int)); |
| 651 extern void setup_raw_text_coding_system P_ ((struct coding_system *)); | 557 extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object)); |
| 652 extern Lisp_Object encode_coding_string P_ ((Lisp_Object, | 558 extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object)); |
| 653 struct coding_system *, int)); | 559 |
| 654 extern Lisp_Object decode_coding_string P_ ((Lisp_Object, | 560 extern int decode_coding_gap P_ ((struct coding_system *, |
| 655 struct coding_system *, int)); | 561 EMACS_INT, EMACS_INT)); |
| 562 extern int encode_coding_gap P_ ((struct coding_system *, | |
| 563 EMACS_INT, EMACS_INT)); | |
| 564 extern void decode_coding_object P_ ((struct coding_system *, | |
| 565 Lisp_Object, EMACS_INT, EMACS_INT, | |
| 566 EMACS_INT, EMACS_INT, Lisp_Object)); | |
| 567 extern void encode_coding_object P_ ((struct coding_system *, | |
| 568 Lisp_Object, EMACS_INT, EMACS_INT, | |
| 569 EMACS_INT, EMACS_INT, Lisp_Object)); | |
| 570 | |
| 571 #define decode_coding_region(coding, from, to) \ | |
| 572 decode_coding_object (coding, Fcurrent_buffer (), \ | |
| 573 from, CHAR_TO_BYTE (from), \ | |
| 574 to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) | |
| 575 | |
| 576 | |
| 577 #define encode_coding_region(coding, from, to) \ | |
| 578 encode_coding_object (coding, Fcurrent_buffer (), \ | |
| 579 from, CHAR_TO_BYTE (from), \ | |
| 580 to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) | |
| 581 | |
| 582 | |
| 583 #define decode_coding_string(coding, string, nocopy) \ | |
| 584 decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ | |
| 585 STRING_BYTES (XSTRING (string)), Qt) | |
| 586 | |
| 587 #define encode_coding_string(coding, string, nocopy) \ | |
| 588 (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ | |
| 589 STRING_BYTES (XSTRING (string)), Qt), \ | |
| 590 (coding)->dst_object) | |
| 591 | |
| 592 | |
| 593 #define decode_coding_c_string(coding, src, bytes, dst_object) \ | |
| 594 do { \ | |
| 595 (coding)->source = (src); \ | |
| 596 (coding)->src_chars = (coding)->src_bytes = (bytes); \ | |
| 597 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \ | |
| 598 (dst_object)); \ | |
| 599 } while (0) | |
| 600 | |
| 601 | |
| 602 extern Lisp_Object preferred_coding_system P_ (()); | |
| 603 | |
| 604 | |
| 605 extern Lisp_Object Qutf_8, Qutf_8_emacs; | |
| 606 | |
| 656 extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; | 607 extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; |
| 657 extern Lisp_Object Qraw_text, Qemacs_mule; | 608 extern Lisp_Object Qcoding_system_p; |
| 609 extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided; | |
| 610 extern Lisp_Object Qiso_2022; | |
| 658 extern Lisp_Object Qbuffer_file_coding_system; | 611 extern Lisp_Object Qbuffer_file_coding_system; |
| 659 extern Lisp_Object Vcoding_category_list; | 612 |
| 660 extern Lisp_Object Qutf_8; | 613 extern Lisp_Object Qunix, Qdos, Qmac; |
| 661 | 614 |
| 662 extern Lisp_Object Qtranslation_table; | 615 extern Lisp_Object Qtranslation_table; |
| 663 extern Lisp_Object Qtranslation_table_id; | 616 extern Lisp_Object Qtranslation_table_id; |
| 664 | 617 |
| 665 /* Mnemonic strings to indicate each type of end-of-line. */ | 618 /* Mnemonic strings to indicate each type of end-of-line. */ |
| 700 /* Coding-system of what is sent from terminal keyboard. This | 653 /* Coding-system of what is sent from terminal keyboard. This |
| 701 structure contains information of a coding-system specified by the | 654 structure contains information of a coding-system specified by the |
| 702 function `set-keyboard-coding-system'. */ | 655 function `set-keyboard-coding-system'. */ |
| 703 extern struct coding_system keyboard_coding; | 656 extern struct coding_system keyboard_coding; |
| 704 | 657 |
| 705 /* Default coding system to be used to write a file. */ | |
| 706 extern struct coding_system default_buffer_file_coding; | |
| 707 | |
| 708 /* Default coding systems used for process I/O. */ | 658 /* Default coding systems used for process I/O. */ |
| 709 extern Lisp_Object Vdefault_process_coding_system; | 659 extern Lisp_Object Vdefault_process_coding_system; |
| 710 | 660 |
| 711 /* Function to call to force a user to force select a proper coding | 661 /* Function to call to force a user to force select a propert coding |
| 712 system. */ | 662 system. */ |
| 713 extern Lisp_Object Vselect_safe_coding_system_function; | 663 extern Lisp_Object Vselect_safe_coding_system_function; |
| 714 | 664 |
| 715 /* If nonzero, on writing a file, Vselect_safe_coding_system_function | 665 /* If nonzero, on writing a file, Vselect_safe_coding_system_function |
| 716 is called even if Vcoding_system_for_write is non-nil. */ | 666 is called even if Vcoding_system_for_write is non-nil. */ |
| 726 #endif | 676 #endif |
| 727 | 677 |
| 728 /* Error signaled when there's a problem with detecting coding system */ | 678 /* Error signaled when there's a problem with detecting coding system */ |
| 729 extern Lisp_Object Qcoding_system_error; | 679 extern Lisp_Object Qcoding_system_error; |
| 730 | 680 |
| 681 extern char emacs_mule_bytes[256]; | |
| 682 extern int emacs_mule_string_char P_ ((unsigned char *)); | |
| 683 | |
| 731 #endif /* EMACS_CODING_H */ | 684 #endif /* EMACS_CODING_H */ |
