Mercurial > emacs
comparison src/coding.c @ 89732:8acde12aba89
(get_translation_table): New function.
(translate_chars): Fix the bug of skipping annotation data.
(decode_coding): Utilze get_translation_table.
(encode_coding): Likewise.
(char_encodable_p): Translate char if necessary.
(Funencodable_char_position): Likewise.
(Ffind_coding_systems_region_internal): Setup translation table
for encode in a coding system attribute vector in advance.
(Fcheck_coding_systems_region): Likewise.
(Fdefine_coding_system_internal): Allow a symbol as translation
table. For shift-jis type coding system, allow 4th charset.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Sun, 25 Jan 2004 07:30:47 +0000 |
| parents | d8fcefca5cf6 |
| children | 293c9235be3f |
comparison
equal
deleted
inserted
replaced
| 89731:e4b184cef172 | 89732:8acde12aba89 |
|---|---|
| 5482 coding->produced -= n; | 5482 coding->produced -= n; |
| 5483 coding->produced_char -= n; | 5483 coding->produced_char -= n; |
| 5484 } | 5484 } |
| 5485 } | 5485 } |
| 5486 | 5486 |
| 5487 | |
| 5488 /* Return a translation table from coding system attribute vector ATTRS | |
| 5489 for encoding (ENCODEP is nonzero) or decoding (ENCODEP is zeor). */ | |
| 5490 | |
| 5491 static INLINE | |
| 5492 get_translation_table (attrs, encodep) | |
| 5493 { | |
| 5494 Lisp_Object standard, translation_table; | |
| 5495 | |
| 5496 if (encodep) | |
| 5497 translation_table = CODING_ATTR_ENCODE_TBL (attrs), | |
| 5498 standard = Vstandard_translation_table_for_encode; | |
| 5499 else | |
| 5500 translation_table = CODING_ATTR_DECODE_TBL (attrs), | |
| 5501 standard = Vstandard_translation_table_for_decode; | |
| 5502 if (! NILP (translation_table) && SYMBOLP (translation_table)) | |
| 5503 translation_table = Fget (translation_table, Qtranslation_table); | |
| 5504 if (NILP (translation_table)) | |
| 5505 translation_table = standard; | |
| 5506 if (! CHAR_TABLE_P (translation_table)) | |
| 5507 translation_table = Qnil; | |
| 5508 return translation_table; | |
| 5509 } | |
| 5510 | |
| 5511 | |
| 5487 static void | 5512 static void |
| 5488 translate_chars (coding, table) | 5513 translate_chars (coding, table) |
| 5489 struct coding_system *coding; | 5514 struct coding_system *coding; |
| 5490 Lisp_Object table; | 5515 Lisp_Object table; |
| 5491 { | 5516 { |
| 5498 | 5523 |
| 5499 while (charbuf < charbuf_end) | 5524 while (charbuf < charbuf_end) |
| 5500 { | 5525 { |
| 5501 c = *charbuf; | 5526 c = *charbuf; |
| 5502 if (c < 0) | 5527 if (c < 0) |
| 5503 charbuf += c; | 5528 charbuf += -c; |
| 5504 else | 5529 else |
| 5505 *charbuf++ = translate_char (table, c); | 5530 *charbuf++ = translate_char (table, c); |
| 5506 } | 5531 } |
| 5507 } | 5532 } |
| 5508 | 5533 |
| 5838 decode_coding (coding) | 5863 decode_coding (coding) |
| 5839 struct coding_system *coding; | 5864 struct coding_system *coding; |
| 5840 { | 5865 { |
| 5841 Lisp_Object attrs; | 5866 Lisp_Object attrs; |
| 5842 Lisp_Object undo_list; | 5867 Lisp_Object undo_list; |
| 5868 Lisp_Object translation_table; | |
| 5843 | 5869 |
| 5844 if (BUFFERP (coding->src_object) | 5870 if (BUFFERP (coding->src_object) |
| 5845 && coding->src_pos > 0 | 5871 && coding->src_pos > 0 |
| 5846 && coding->src_pos < GPT | 5872 && coding->src_pos < GPT |
| 5847 && coding->src_pos + coding->src_chars > GPT) | 5873 && coding->src_pos + coding->src_chars > GPT) |
| 5865 coding->errors = 0; | 5891 coding->errors = 0; |
| 5866 | 5892 |
| 5867 ALLOC_CONVERSION_WORK_AREA (coding); | 5893 ALLOC_CONVERSION_WORK_AREA (coding); |
| 5868 | 5894 |
| 5869 attrs = CODING_ID_ATTRS (coding->id); | 5895 attrs = CODING_ID_ATTRS (coding->id); |
| 5896 translation_table = get_translation_table (attrs, 1); | |
| 5870 | 5897 |
| 5871 do | 5898 do |
| 5872 { | 5899 { |
| 5873 coding_set_source (coding); | 5900 coding_set_source (coding); |
| 5874 coding->annotated = 0; | 5901 coding->annotated = 0; |
| 5875 (*(coding->decoder)) (coding); | 5902 (*(coding->decoder)) (coding); |
| 5876 if (!NILP (CODING_ATTR_DECODE_TBL (attrs))) | 5903 if (!NILP (translation_table)) |
| 5877 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs)); | 5904 translate_chars (coding, translation_table); |
| 5878 else if (!NILP (Vstandard_translation_table_for_decode)) | |
| 5879 translate_chars (coding, Vstandard_translation_table_for_decode); | |
| 5880 coding_set_destination (coding); | 5905 coding_set_destination (coding); |
| 5881 produce_chars (coding); | 5906 produce_chars (coding); |
| 5882 if (coding->annotated) | 5907 if (coding->annotated) |
| 5883 produce_annotation (coding); | 5908 produce_annotation (coding); |
| 5884 } | 5909 } |
| 6165 static int | 6190 static int |
| 6166 encode_coding (coding) | 6191 encode_coding (coding) |
| 6167 struct coding_system *coding; | 6192 struct coding_system *coding; |
| 6168 { | 6193 { |
| 6169 Lisp_Object attrs; | 6194 Lisp_Object attrs; |
| 6195 Lisp_Object translation_table; | |
| 6170 | 6196 |
| 6171 attrs = CODING_ID_ATTRS (coding->id); | 6197 attrs = CODING_ID_ATTRS (coding->id); |
| 6198 translation_table = get_translation_table (attrs, 1); | |
| 6172 | 6199 |
| 6173 if (BUFFERP (coding->dst_object)) | 6200 if (BUFFERP (coding->dst_object)) |
| 6174 { | 6201 { |
| 6175 set_buffer_internal (XBUFFER (coding->dst_object)); | 6202 set_buffer_internal (XBUFFER (coding->dst_object)); |
| 6176 coding->dst_multibyte | 6203 coding->dst_multibyte |
| 6186 | 6213 |
| 6187 do { | 6214 do { |
| 6188 coding_set_source (coding); | 6215 coding_set_source (coding); |
| 6189 consume_chars (coding); | 6216 consume_chars (coding); |
| 6190 | 6217 |
| 6191 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs))) | 6218 if (!NILP (translation_table)) |
| 6192 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs)); | 6219 translate_chars (coding, translation_table); |
| 6193 else if (!NILP (Vstandard_translation_table_for_encode)) | |
| 6194 translate_chars (coding, Vstandard_translation_table_for_encode); | |
| 6195 | 6220 |
| 6196 coding_set_destination (coding); | 6221 coding_set_destination (coding); |
| 6197 (*(coding->encoder)) (coding); | 6222 (*(coding->encoder)) (coding); |
| 6198 } while (coding->consumed_char < coding->src_chars); | 6223 } while (coding->consumed_char < coding->src_chars); |
| 6199 | 6224 |
| 7070 int c; | 7095 int c; |
| 7071 Lisp_Object attrs; | 7096 Lisp_Object attrs; |
| 7072 { | 7097 { |
| 7073 Lisp_Object tail; | 7098 Lisp_Object tail; |
| 7074 struct charset *charset; | 7099 struct charset *charset; |
| 7075 | 7100 Lisp_Object translation_table; |
| 7101 | |
| 7102 translation_table = CODING_ATTR_TRANS_TBL (attrs); | |
| 7103 if (CHAR_TABLE_P (translation_table)) | |
| 7104 c = translate_char (translation_table, c); | |
| 7076 for (tail = CODING_ATTR_CHARSET_LIST (attrs); | 7105 for (tail = CODING_ATTR_CHARSET_LIST (attrs); |
| 7077 CONSP (tail); tail = XCDR (tail)) | 7106 CONSP (tail); tail = XCDR (tail)) |
| 7078 { | 7107 { |
| 7079 charset = CHARSET_FROM_ID (XINT (XCAR (tail))); | 7108 charset = CHARSET_FROM_ID (XINT (XCAR (tail))); |
| 7080 if (CHAR_CHARSET_P (c, charset)) | 7109 if (CHAR_CHARSET_P (c, charset)) |
| 7141 Lisp_Object attrs; | 7170 Lisp_Object attrs; |
| 7142 | 7171 |
| 7143 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); | 7172 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); |
| 7144 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)) | 7173 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)) |
| 7145 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided)) | 7174 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided)) |
| 7146 coding_attrs_list = Fcons (attrs, coding_attrs_list); | 7175 { |
| 7176 ASET (attrs, coding_attr_trans_tbl, | |
| 7177 get_translation_table (attrs, 1)); | |
| 7178 coding_attrs_list = Fcons (attrs, coding_attrs_list); | |
| 7179 } | |
| 7147 } | 7180 } |
| 7148 | 7181 |
| 7149 if (STRINGP (start)) | 7182 if (STRINGP (start)) |
| 7150 p = pbeg = SDATA (start); | 7183 p = pbeg = SDATA (start); |
| 7151 else | 7184 else |
| 7222 (start, end, coding_system, count, string) | 7255 (start, end, coding_system, count, string) |
| 7223 Lisp_Object start, end, coding_system, count, string; | 7256 Lisp_Object start, end, coding_system, count, string; |
| 7224 { | 7257 { |
| 7225 int n; | 7258 int n; |
| 7226 struct coding_system coding; | 7259 struct coding_system coding; |
| 7227 Lisp_Object attrs, charset_list; | 7260 Lisp_Object attrs, charset_list, translation_table; |
| 7228 Lisp_Object positions; | 7261 Lisp_Object positions; |
| 7229 int from, to; | 7262 int from, to; |
| 7230 const unsigned char *p, *stop, *pend; | 7263 const unsigned char *p, *stop, *pend; |
| 7231 int ascii_compatible; | 7264 int ascii_compatible; |
| 7232 | 7265 |
| 7234 attrs = CODING_ID_ATTRS (coding.id); | 7267 attrs = CODING_ID_ATTRS (coding.id); |
| 7235 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text)) | 7268 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text)) |
| 7236 return Qnil; | 7269 return Qnil; |
| 7237 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); | 7270 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); |
| 7238 charset_list = CODING_ATTR_CHARSET_LIST (attrs); | 7271 charset_list = CODING_ATTR_CHARSET_LIST (attrs); |
| 7272 translation_table = get_translation_table (attrs, 1); | |
| 7239 | 7273 |
| 7240 if (NILP (string)) | 7274 if (NILP (string)) |
| 7241 { | 7275 { |
| 7242 validate_region (&start, &end); | 7276 validate_region (&start, &end); |
| 7243 from = XINT (start); | 7277 from = XINT (start); |
| 7295 p = GAP_END_ADDR; | 7329 p = GAP_END_ADDR; |
| 7296 } | 7330 } |
| 7297 | 7331 |
| 7298 c = STRING_CHAR_ADVANCE (p); | 7332 c = STRING_CHAR_ADVANCE (p); |
| 7299 if (! (ASCII_CHAR_P (c) && ascii_compatible) | 7333 if (! (ASCII_CHAR_P (c) && ascii_compatible) |
| 7300 && ! char_charset (c, charset_list, NULL)) | 7334 && ! char_charset (translate_char (translation_table, c), |
| 7335 charset_list, NULL)) | |
| 7301 { | 7336 { |
| 7302 positions = Fcons (make_number (from), positions); | 7337 positions = Fcons (make_number (from), positions); |
| 7303 n--; | 7338 n--; |
| 7304 if (n == 0) | 7339 if (n == 0) |
| 7305 break; | 7340 break; |
| 7336 Lisp_Object list; | 7371 Lisp_Object list; |
| 7337 EMACS_INT start_byte, end_byte; | 7372 EMACS_INT start_byte, end_byte; |
| 7338 int pos; | 7373 int pos; |
| 7339 const unsigned char *p, *pbeg, *pend; | 7374 const unsigned char *p, *pbeg, *pend; |
| 7340 int c; | 7375 int c; |
| 7341 Lisp_Object tail, elt; | 7376 Lisp_Object tail, elt, attrs; |
| 7342 | 7377 |
| 7343 if (STRINGP (start)) | 7378 if (STRINGP (start)) |
| 7344 { | 7379 { |
| 7345 if (!STRING_MULTIBYTE (start) | 7380 if (!STRING_MULTIBYTE (start) |
| 7346 && SCHARS (start) != SBYTES (start)) | 7381 && SCHARS (start) != SBYTES (start)) |
| 7374 | 7409 |
| 7375 list = Qnil; | 7410 list = Qnil; |
| 7376 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail)) | 7411 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail)) |
| 7377 { | 7412 { |
| 7378 elt = XCAR (tail); | 7413 elt = XCAR (tail); |
| 7379 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0), | 7414 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0); |
| 7380 Qnil)), | 7415 ASET (attrs, coding_attr_trans_tbl, get_translation_table (attrs, 1)); |
| 7381 list); | 7416 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list); |
| 7382 } | 7417 } |
| 7383 | 7418 |
| 7384 if (STRINGP (start)) | 7419 if (STRINGP (start)) |
| 7385 p = pbeg = SDATA (start); | 7420 p = pbeg = SDATA (start); |
| 7386 else | 7421 else |
| 8131 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets; | 8166 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets; |
| 8132 | 8167 |
| 8133 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p]; | 8168 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p]; |
| 8134 | 8169 |
| 8135 val = args[coding_arg_decode_translation_table]; | 8170 val = args[coding_arg_decode_translation_table]; |
| 8136 if (! NILP (val)) | 8171 if (! CHAR_TABLE_P (val)) |
| 8137 CHECK_CHAR_TABLE (val); | 8172 CHECK_SYMBOL (val); |
| 8138 CODING_ATTR_DECODE_TBL (attrs) = val; | 8173 CODING_ATTR_DECODE_TBL (attrs) = val; |
| 8139 | 8174 |
| 8140 val = args[coding_arg_encode_translation_table]; | 8175 val = args[coding_arg_encode_translation_table]; |
| 8141 if (! NILP (val)) | 8176 if (! CHAR_TABLE_P (val)) |
| 8142 CHECK_CHAR_TABLE (val); | 8177 CHECK_SYMBOL (val); |
| 8143 CODING_ATTR_ENCODE_TBL (attrs) = val; | 8178 CODING_ATTR_ENCODE_TBL (attrs) = val; |
| 8144 | 8179 |
| 8145 val = args[coding_arg_post_read_conversion]; | 8180 val = args[coding_arg_post_read_conversion]; |
| 8146 CHECK_SYMBOL (val); | 8181 CHECK_SYMBOL (val); |
| 8147 CODING_ATTR_POST_READ (attrs) = val; | 8182 CODING_ATTR_POST_READ (attrs) = val; |
| 8413 else if (EQ (coding_type, Qshift_jis)) | 8448 else if (EQ (coding_type, Qshift_jis)) |
| 8414 { | 8449 { |
| 8415 | 8450 |
| 8416 struct charset *charset; | 8451 struct charset *charset; |
| 8417 | 8452 |
| 8418 if (XINT (Flength (charset_list)) != 3) | 8453 if (XINT (Flength (charset_list)) != 3 |
| 8419 error ("There should be just three charsets"); | 8454 || XINT (Flength (charset_list)) != 4) |
| 8455 error ("There should be three or four charsets"); | |
| 8420 | 8456 |
| 8421 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | 8457 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| 8422 if (CHARSET_DIMENSION (charset) != 1) | 8458 if (CHARSET_DIMENSION (charset) != 1) |
| 8423 error ("Dimension of charset %s is not one", | 8459 error ("Dimension of charset %s is not one", |
| 8424 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); | 8460 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); |
| 8427 | 8463 |
| 8428 charset_list = XCDR (charset_list); | 8464 charset_list = XCDR (charset_list); |
| 8429 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | 8465 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| 8430 if (CHARSET_DIMENSION (charset) != 1) | 8466 if (CHARSET_DIMENSION (charset) != 1) |
| 8431 error ("Dimension of charset %s is not one", | 8467 error ("Dimension of charset %s is not one", |
| 8468 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); | |
| 8469 | |
| 8470 charset_list = XCDR (charset_list); | |
| 8471 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | |
| 8472 if (CHARSET_DIMENSION (charset) != 2) | |
| 8473 error ("Dimension of charset %s is not two", | |
| 8432 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); | 8474 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); |
| 8433 | 8475 |
| 8434 charset_list = XCDR (charset_list); | 8476 charset_list = XCDR (charset_list); |
| 8435 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | 8477 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| 8436 if (CHARSET_DIMENSION (charset) != 2) | 8478 if (CHARSET_DIMENSION (charset) != 2) |
