comparison src/coding.c @ 29005:b396df3a5181

(ONE_MORE_BYTE, TWO_MORE_BYTES): Set coding->resutl to CODING_FINISH_INSUFFICIENT_SRC if there's not enough source. (ONE_MORE_CHAR, EMIT_CHAR, EMIT_ONE_BYTE, EMIT_TWO_BYTE, EMIT_BYTES): New macros. (THREE_MORE_BYTES, DECODE_CHARACTER_ASCII, DECODE_CHARACTER_DIMENSION1, DECODE_CHARACTER_DIMENSION2): These macros deleted. (CHECK_CODE_RANGE_A0_FF): This macro deleted. (detect_coding_emacs_mule): Use UNIBYTE_STR_AS_MULTIBYTE_P to check the validity of multibyte sequence. (decode_coding_emacs_mule): New function. (encode_coding_emacs_mule): New macro. (detect_coding_iso2022): Use ONE_MORE_BYTE to fetch a byte from the source. (DECODE_ISO_CHARACTER): Just return a character code. (DECODE_COMPOSITION_START): Set coding->result instead of result. (decode_coding_iso2022, decode_coding_sjis_big5, decode_eol): Use EMIT_CHAR to produced decoded characters. Exit the loop only by macros ONE_MORE_BYTE or EMIT_CHAR. Don't handle the case of last block here. (ENCODE_ISO_CHARACTER): Don't translate character here. Produce only position codes for an invalid character. (encode_designation_at_bol): Return new destination pointer. 5th arg DSTP is changed to DST. (encode_coding_iso2022, decode_coding_sjis_big5): Get a character from the source by ONE_MORE_CHAR. Don't handle the case of last block here. (DECODE_SJIS_BIG5_CHARACTER, ENCODE_SJIS_BIG5_CHARACTER): These macros deleted. (detect_coding_sjis, detect_coding_big5, detect_coding_utf_8, detect_coding_utf_16, detect_coding_ccl): Use ONE_MORE_BYTE and TWO_MORE_BYTES to fetch a byte from the source. (encode_eol): Pay attention to coding->src_multibyte. (detect_coding, detect_eol): Preserve members src_multibyte and dst_multibyte. (DECODING_BUFFER_MAG): Return 2 even for coding_type_raw_text. (encoding_buffer_size): Set magnification to 3 for all coding systems that require encoding. (ccl_coding_driver): For decoding, be sure that the result is valid multibyte sequence. (decode_coding): Initialize coding->errors and coding->result. For emacs-mule, call decode_coding_emacs_mule. For no-conversion and raw-text, always call decode_eol. Handle the case of last block here. If not coding->dst_multibyte, convert the resulting sequence to unibyte. (encode_coding): Initialize coding->errors and coding->result. For emacs-mule, call encode_coding_emacs_mule. For no-conversion and raw-text, always call encode_eol. Handle the case of last block here. (shrink_decoding_region, shrink_encoding_region): Detect cases that we can't skip data more rigidly. (code_convert_region): Setup src_multibyte and dst_multibyte members of coding. For decoding, if the buffer is multibyte, convert the source sequence to unibyte in advance. For encoding, if the buffer is multibyte, convert the resulting sequence to multibyte afterward. (run_pre_post_conversion_on_str): New function. (code_convert_string): Deleted and divided into the following two. (decode_coding_string, encode_coding_string): New functions. (code_convert_string1, code_convert_string_norecord): Call one of above. (Fdecode_sjis_char, Fdecode_big5_char): Use MAKE_CHAR instead of MAKE_NON_ASCII_CHAR. (Fset_terminal_coding_system_internal, Fset_safe_terminal_coding_system_internal): Setup src_multibyte and dst_multibyte members. (init_coding_once): Initialize iso_code_class with new enum ISO_control_0 and ISO_control_1.
author Kenichi Handa <handa@m17n.org>
date Fri, 19 May 2000 23:54:56 +0000
parents 01292435daaf
children 176708661b08
comparison
equal deleted inserted replaced
29004:383e4e21306a 29005:b396df3a5181
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */ 20 Boston, MA 02111-1307, USA. */
21 21
22 /*** TABLE OF CONTENTS *** 22 /*** TABLE OF CONTENTS ***
23 23
24 0. General comments
24 1. Preamble 25 1. Preamble
25 2. Emacs' internal format (emacs-mule) handlers 26 2. Emacs' internal format (emacs-mule) handlers
26 3. ISO2022 handlers 27 3. ISO2022 handlers
27 4. Shift-JIS and BIG5 handlers 28 4. Shift-JIS and BIG5 handlers
28 5. CCL handlers 29 5. CCL handlers
31 8. Emacs Lisp library functions 32 8. Emacs Lisp library functions
32 9. Post-amble 33 9. Post-amble
33 34
34 */ 35 */
35 36
37 /*** 0. General comments ***/
38
39
36 /*** GENERAL NOTE on CODING SYSTEM *** 40 /*** GENERAL NOTE on CODING SYSTEM ***
37 41
38 Coding system is an encoding mechanism of one or more character 42 Coding system is an encoding mechanism of one or more character
39 sets. Here's a list of coding systems which Emacs can handle. When 43 sets. Here's a list of coding systems which Emacs can handle. When
40 we say "decode", it means converting some other coding system to 44 we say "decode", it means converting some other coding system to
118 } 122 }
119 #endif 123 #endif
120 124
121 /*** GENERAL NOTES on `decode_coding_XXX ()' functions *** 125 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
122 126
123 These functions decode SRC_BYTES length text at SOURCE encoded in 127 These functions decode SRC_BYTES length of unibyte text at SOURCE
124 CODING to Emacs' internal format (emacs-mule). The resulting text 128 encoded in CODING to Emacs' internal format. The resulting
125 goes to a place pointed to by DESTINATION, the length of which 129 multibyte text goes to a place pointed to by DESTINATION, the length
126 should not exceed DST_BYTES. These functions set the information of 130 of which should not exceed DST_BYTES.
127 original and decoded texts in the members produced, produced_char, 131
128 consumed, and consumed_char of the structure *CODING. 132 These functions set the information of original and decoded texts in
129 133 the members produced, produced_char, consumed, and consumed_char of
130 The return value is an integer (CODING_FINISH_XXX) indicating how 134 the structure *CODING. They also set the member result to one of
131 the decoding finished. 135 CODING_FINISH_XXX indicating how the decoding finished.
132 136
133 DST_BYTES zero means that source area and destination area are 137 DST_BYTES zero means that source area and destination area are
134 overlapped, which means that we can produce a decoded text until it 138 overlapped, which means that we can produce a decoded text until it
135 reaches at the head of not-yet-decoded source text. 139 reaches at the head of not-yet-decoded source text.
136 140
137 Below is a template of these functions. */ 141 Below is a template of these functions. */
138 #if 0 142 #if 0
143 static void
139 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) 144 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
140 struct coding_system *coding; 145 struct coding_system *coding;
141 unsigned char *source, *destination; 146 unsigned char *source, *destination;
142 int src_bytes, dst_bytes; 147 int src_bytes, dst_bytes;
143 { 148 {
146 #endif 151 #endif
147 152
148 /*** GENERAL NOTES on `encode_coding_XXX ()' functions *** 153 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
149 154
150 These functions encode SRC_BYTES length text at SOURCE of Emacs' 155 These functions encode SRC_BYTES length text at SOURCE of Emacs'
151 internal format (emacs-mule) to CODING. The resulting text goes to 156 internal multibyte format to CODING. The resulting unibyte text
152 a place pointed to by DESTINATION, the length of which should not 157 goes to a place pointed to by DESTINATION, the length of which
153 exceed DST_BYTES. These functions set the information of 158 should not exceed DST_BYTES.
154 original and encoded texts in the members produced, produced_char, 159
155 consumed, and consumed_char of the structure *CODING. 160 These functions set the information of original and encoded texts in
156 161 the members produced, produced_char, consumed, and consumed_char of
157 The return value is an integer (CODING_FINISH_XXX) indicating how 162 the structure *CODING. They also set the member result to one of
158 the encoding finished. 163 CODING_FINISH_XXX indicating how the encoding finished.
159 164
160 DST_BYTES zero means that source area and destination area are 165 DST_BYTES zero means that source area and destination area are
161 overlapped, which means that we can produce a decoded text until it 166 overlapped, which means that we can produce a encoded text until it
162 reaches at the head of not-yet-decoded source text. 167 reaches at the head of not-yet-encoded source text.
163 168
164 Below is a template of these functions. */ 169 Below is a template of these functions. */
165 #if 0 170 #if 0
171 static void
166 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) 172 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
167 struct coding_system *coding; 173 struct coding_system *coding;
168 unsigned char *source, *destination; 174 unsigned char *source, *destination;
169 int src_bytes, dst_bytes; 175 int src_bytes, dst_bytes;
170 { 176 {
172 } 178 }
173 #endif 179 #endif
174 180
175 /*** COMMONLY USED MACROS ***/ 181 /*** COMMONLY USED MACROS ***/
176 182
177 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and 183 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
178 THREE_MORE_BYTES safely get one, two, and three bytes from the 184 get one, two, and three bytes from the source text respectively.
179 source text respectively. If there are not enough bytes in the 185 If there are not enough bytes in the source, they jump to
180 source, they jump to `label_end_of_loop'. The caller should set 186 `label_end_of_loop'. The caller should set variables `coding',
181 variables `src' and `src_end' to appropriate areas in advance. */ 187 `src' and `src_end' to appropriate pointer in advance. These
182 188 macros are called from decoding routines `decode_coding_XXX', thus
183 #define ONE_MORE_BYTE(c1) \ 189 it is assumed that the source text is unibyte. */
184 do { \ 190
185 if (src < src_end) \ 191 #define ONE_MORE_BYTE(c1) \
186 c1 = *src++; \ 192 do { \
187 else \ 193 if (src >= src_end) \
188 goto label_end_of_loop; \ 194 { \
195 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
196 goto label_end_of_loop; \
197 } \
198 c1 = *src++; \
189 } while (0) 199 } while (0)
190 200
191 #define TWO_MORE_BYTES(c1, c2) \ 201 #define TWO_MORE_BYTES(c1, c2) \
192 do { \ 202 do { \
193 if (src + 1 < src_end) \ 203 if (src + 1 >= src_end) \
194 c1 = *src++, c2 = *src++; \ 204 { \
195 else \ 205 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
196 goto label_end_of_loop; \ 206 goto label_end_of_loop; \
207 } \
208 c1 = *src++; \
209 c2 = *src++; \
197 } while (0) 210 } while (0)
198 211
199 #define THREE_MORE_BYTES(c1, c2, c3) \ 212
200 do { \ 213 /* Set C to the next character at the source text pointed by `src'.
201 if (src + 2 < src_end) \ 214 If there are not enough characters in the source, jump to
202 c1 = *src++, c2 = *src++, c3 = *src++; \ 215 `label_end_of_loop'. The caller should set variables `coding'
203 else \ 216 `src', `src_end', and `translation_table' to appropriate pointers
204 goto label_end_of_loop; \ 217 in advance. This macro is used in encoding routines
218 `encode_coding_XXX', thus it assumes that the source text is in
219 multibyte form except for 8-bit characters. 8-bit characters are
220 in multibyte form if coding->src_multibyte is nonzero, else they
221 are represented by a single byte. */
222
223 #define ONE_MORE_CHAR(c) \
224 do { \
225 int len = src_end - src; \
226 int bytes; \
227 if (len <= 0) \
228 { \
229 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
230 goto label_end_of_loop; \
231 } \
232 if (coding->src_multibyte \
233 || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes)) \
234 c = STRING_CHAR_AND_LENGTH (src, len, bytes); \
235 else \
236 c = *src, bytes = 1; \
237 if (!NILP (translation_table)) \
238 c = translate_char (translation_table, c, 0, 0, 0); \
239 src += bytes; \
205 } while (0) 240 } while (0)
206 241
207 /* The following three macros DECODE_CHARACTER_ASCII, 242
208 DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put 243 /* Produce a multibyte form of characater C to `dst'. Jump to
209 the multi-byte form of a character of each class at the place 244 `label_end_of_loop' if there's not enough space at `dst'.
210 pointed by `dst'. The caller should set the variable `dst' to 245
211 point to an appropriate area and the variable `coding' to point to 246 If we are now in the middle of composition sequence, the decoded
212 the coding-system of the currently decoding text in advance. */ 247 character may be ALTCHAR (for the current composition). In that
213 248 case, the character goes to coding->cmp_data->data instead of
214 /* Decode one ASCII character C. */ 249 `dst'.
215 250
216 #define DECODE_CHARACTER_ASCII(c) \ 251 This macro is used in decoding routines. */
217 do { \ 252
218 *dst++ = (c) & 0x7F; \ 253 #define EMIT_CHAR(c) \
219 coding->produced_char++; \ 254 do { \
255 if (! COMPOSING_P (coding) \
256 || coding->composing == COMPOSITION_RELATIVE \
257 || coding->composing == COMPOSITION_WITH_RULE) \
258 { \
259 int bytes = CHAR_BYTES (c); \
260 if ((dst + bytes) > (dst_bytes ? dst_end : src)) \
261 { \
262 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
263 goto label_end_of_loop; \
264 } \
265 dst += CHAR_STRING (c, dst); \
266 coding->produced_char++; \
267 } \
268 \
269 if (COMPOSING_P (coding) \
270 && coding->composing != COMPOSITION_RELATIVE) \
271 { \
272 CODING_ADD_COMPOSITION_COMPONENT (coding, c); \
273 coding->composition_rule_follows \
274 = coding->composing != COMPOSITION_WITH_ALTCHARS; \
275 } \
220 } while (0) 276 } while (0)
221 277
222 /* Decode one DIMENSION1 character whose charset is CHARSET and whose 278
223 position-code is C. */ 279 #define EMIT_ONE_BYTE(c) \
224 280 do { \
225 #define DECODE_CHARACTER_DIMENSION1(charset, c) \ 281 if (dst >= (dst_bytes ? dst_end : src)) \
226 do { \ 282 { \
227 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ 283 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
228 \ 284 goto label_end_of_loop; \
229 *dst++ = leading_code; \ 285 } \
230 if ((leading_code = CHARSET_LEADING_CODE_EXT (charset)) > 0) \ 286 *dst++ = c; \
231 *dst++ = leading_code; \
232 *dst++ = (c) | 0x80; \
233 coding->produced_char++; \
234 } while (0) 287 } while (0)
235 288
236 /* Decode one DIMENSION2 character whose charset is CHARSET and whose 289 #define EMIT_TWO_BYTES(c1, c2) \
237 position-codes are C1 and C2. */ 290 do { \
238 291 if (dst + 2 > (dst_bytes ? dst_end : src)) \
239 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2) \ 292 { \
240 do { \ 293 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
241 DECODE_CHARACTER_DIMENSION1 (charset, c1); \ 294 goto label_end_of_loop; \
242 *dst++ = (c2) | 0x80; \ 295 } \
296 *dst++ = c1, *dst++ = c2; \
297 } while (0)
298
299 #define EMIT_BYTES(from, to) \
300 do { \
301 if (dst + (to - from) > (dst_bytes ? dst_end : src)) \
302 { \
303 coding->result = CODING_FINISH_INSUFFICIENT_DST; \
304 goto label_end_of_loop; \
305 } \
306 while (from < to) \
307 *dst++ = *from++; \
243 } while (0) 308 } while (0)
244 309
245 310
246 /*** 1. Preamble ***/ 311 /*** 1. Preamble ***/
247 312
406 471
407 /*** 2. Emacs internal format (emacs-mule) handlers ***/ 472 /*** 2. Emacs internal format (emacs-mule) handlers ***/
408 473
409 /* Emacs' internal format for encoding multiple character sets is a 474 /* Emacs' internal format for encoding multiple character sets is a
410 kind of multi-byte encoding, i.e. characters are encoded by 475 kind of multi-byte encoding, i.e. characters are encoded by
411 variable-length sequences of one-byte codes. ASCII characters 476 variable-length sequences of one-byte codes.
412 and control characters (e.g. `tab', `newline') are represented by 477
413 one-byte sequences which are their ASCII codes, in the range 0x00 478 ASCII characters and control characters (e.g. `tab', `newline') are
414 through 0x7F. The other characters are represented by a sequence 479 represented by one-byte sequences which are their ASCII codes, in
415 of `base leading-code', optional `extended leading-code', and one 480 the range 0x00 through 0x7F.
416 or two `position-code's. The length of the sequence is determined 481
417 by the base leading-code. Leading-code takes the range 0x80 482 8-bit characters of the range 0x80..0x9F are represented by
418 through 0x9F, whereas extended leading-code and position-code take 483 two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
419 the range 0xA0 through 0xFF. See `charset.h' for more details 484 code + 0x20).
420 about leading-code and position-code. 485
486 8-bit characters of the range 0xA0..0xFF are represented by
487 one-byte sequences which are their 8-bit code.
488
489 The other characters are represented by a sequence of `base
490 leading-code', optional `extended leading-code', and one or two
491 `position-code's. The length of the sequence is determined by the
492 base leading-code. Leading-code takes the range 0x80 through 0x9F,
493 whereas extended leading-code and position-code take the range 0xA0
494 through 0xFF. See `charset.h' for more details about leading-code
495 and position-code.
421 496
422 --- CODE RANGE of Emacs' internal format --- 497 --- CODE RANGE of Emacs' internal format ---
423 (character set) (range) 498 character set range
424 ASCII 0x00 .. 0x7F 499 ------------- -----
425 ELSE (1st byte) 0x81 .. 0x9F 500 ascii 0x00..0x7F
426 (rest bytes) 0xA0 .. 0xFF 501 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
502 eight-bit-graphic 0xA0..0xBF
503 ELSE 0x81..0x9F + [0xA0..0xFF]+
427 --------------------------------------------- 504 ---------------------------------------------
428 505
429 */ 506 */
430 507
431 enum emacs_code_class_type emacs_code_class[256]; 508 enum emacs_code_class_type emacs_code_class[256];
432
433 /* Go to the next statement only if *SRC is accessible and the code is
434 greater than 0xA0. */
435 #define CHECK_CODE_RANGE_A0_FF \
436 do { \
437 if (src >= src_end) \
438 goto label_end_of_switch; \
439 else if (*src++ < 0xA0) \
440 return 0; \
441 } while (0)
442 509
443 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 510 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
444 Check if a text is encoded in Emacs' internal format. If it is, 511 Check if a text is encoded in Emacs' internal format. If it is,
445 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */ 512 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
446 513
447 int 514 int
448 detect_coding_emacs_mule (src, src_end) 515 detect_coding_emacs_mule (src, src_end)
449 unsigned char *src, *src_end; 516 unsigned char *src, *src_end;
450 { 517 {
451 unsigned char c; 518 unsigned char c;
452 int composing = 0; 519 int composing = 0;
453 520 /* Dummy for ONE_MORE_BYTE. */
454 while (src < src_end) 521 struct coding_system dummy_coding;
455 { 522 struct coding_system *coding = &dummy_coding;
456 c = *src++; 523
524 while (1)
525 {
526 ONE_MORE_BYTE (c);
457 527
458 if (composing) 528 if (composing)
459 { 529 {
460 if (c < 0xA0) 530 if (c < 0xA0)
461 composing = 0; 531 composing = 0;
532 else if (c == 0xA0)
533 {
534 ONE_MORE_BYTE (c);
535 c &= 0x7F;
536 }
462 else 537 else
463 c -= 0x20; 538 c -= 0x20;
464 } 539 }
465 540
466 switch (emacs_code_class[c]) 541 if (c < 0x20)
467 { 542 {
468 case EMACS_ascii_code:
469 case EMACS_linefeed_code:
470 break;
471
472 case EMACS_control_code:
473 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) 543 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
474 return 0; 544 return 0;
545 }
546 else if (c >= 0x80 && c < 0xA0)
547 {
548 if (c == 0x80)
549 /* Old leading code for a composite character. */
550 composing = 1;
551 else
552 {
553 unsigned char *src_base = src - 1;
554 int bytes;
555
556 if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
557 bytes))
558 return 0;
559 src = src_base + bytes;
560 }
561 }
562 }
563 label_end_of_loop:
564 return CODING_CATEGORY_MASK_EMACS_MULE;
565 }
566
567
568 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
569
570 static void
571 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
572 struct coding_system *coding;
573 unsigned char *source, *destination;
574 int src_bytes, dst_bytes;
575 {
576 unsigned char *src = source;
577 unsigned char *src_end = source + src_bytes;
578 unsigned char *dst = destination;
579 unsigned char *dst_end = destination + dst_bytes;
580 /* SRC_BASE remembers the start position in source in each loop.
581 The loop will be exited when there's not enough source code, or
582 when there's not enough destination area to produce a
583 character. */
584 unsigned char *src_base;
585
586 coding->produced_char = 0;
587 while (src < src_end)
588 {
589 unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
590 int bytes;
591
592 src_base = src;
593 if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
594 {
595 p = src;
596 src += bytes;
597 }
598 else
599 {
600 bytes = CHAR_STRING (*src, tmp);
601 p = tmp;
602 src++;
603 }
604 if (dst + bytes >= (dst_bytes ? dst_end : src))
605 {
606 coding->result = CODING_FINISH_INSUFFICIENT_DST;
475 break; 607 break;
476 608 }
477 case EMACS_invalid_code: 609 while (bytes--) *dst++ = *p++;
478 return 0; 610 coding->produced_char++;
479 611 }
480 case EMACS_leading_code_4: 612 coding->consumed = coding->consumed_char = src_base - source;
481 CHECK_CODE_RANGE_A0_FF; 613 coding->produced = dst - destination;
482 /* fall down to check it two more times ... */ 614 }
483 615
484 case EMACS_leading_code_3: 616 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
485 CHECK_CODE_RANGE_A0_FF; 617 encode_eol (coding, source, destination, src_bytes, dst_bytes)
486 /* fall down to check it one more time ... */ 618
487
488 case EMACS_leading_code_2:
489 CHECK_CODE_RANGE_A0_FF;
490 break;
491
492 case 0x80: /* Old leading code for a composite character. */
493 if (composing)
494 CHECK_CODE_RANGE_A0_FF;
495 else
496 composing = 1;
497 break;
498
499 default:
500 label_end_of_switch:
501 break;
502 }
503 }
504 return CODING_CATEGORY_MASK_EMACS_MULE;
505 }
506 619
507 620
508 /*** 3. ISO2022 handlers ***/ 621 /*** 3. ISO2022 handlers ***/
509 622
510 /* The following note describes the coding system ISO2022 briefly. 623 /* The following note describes the coding system ISO2022 briefly.
516 ISO2022 provides many mechanisms to encode several character sets 629 ISO2022 provides many mechanisms to encode several character sets
517 in 7-bit and 8-bit environments. For 7-bite environments, all text 630 in 7-bit and 8-bit environments. For 7-bite environments, all text
518 is encoded using bytes less than 128. This may make the encoded 631 is encoded using bytes less than 128. This may make the encoded
519 text a little bit longer, but the text passes more easily through 632 text a little bit longer, but the text passes more easily through
520 several gateways, some of which strip off MSB (Most Signigant Bit). 633 several gateways, some of which strip off MSB (Most Signigant Bit).
521 634
522 There are two kinds of character sets: control character set and 635 There are two kinds of character sets: control character set and
523 graphic character set. The former contains control characters such 636 graphic character set. The former contains control characters such
524 as `newline' and `escape' to provide control functions (control 637 as `newline' and `escape' to provide control functions (control
525 functions are also provided by escape sequences). The latter 638 functions are also provided by escape sequences). The latter
526 contains graphic characters such as 'A' and '-'. Emacs recognizes 639 contains graphic characters such as 'A' and '-'. Emacs recognizes
658 o ESC '0' -- start relative composition 771 o ESC '0' -- start relative composition
659 o ESC '1' -- end composition 772 o ESC '1' -- end composition
660 o ESC '2' -- start rule-base composition (*) 773 o ESC '2' -- start rule-base composition (*)
661 o ESC '3' -- start relative composition with alternate chars (**) 774 o ESC '3' -- start relative composition with alternate chars (**)
662 o ESC '4' -- start rule-base composition with alternate chars (**) 775 o ESC '4' -- start rule-base composition with alternate chars (**)
663 Since these are not standard escape sequences of any ISO standard, 776 Since these are not standard escape sequences of any ISO standard,
664 the use of them for these meaning is restricted to Emacs only. 777 the use of them for these meaning is restricted to Emacs only.
665 778
666 (*) This form is used only in Emacs 20.5 and the older versions, 779 (*) This form is used only in Emacs 20.5 and the older versions,
667 but the newer versions can safely decode it. 780 but the newer versions can safely decode it.
668 (**) This form is used only in Emacs 21.1 and the newer versions, 781 (**) This form is used only in Emacs 21.1 and the newer versions,
669 and the older versions can't decode it. 782 and the older versions can't decode it.
670 783
671 Here's a list of examples usages of these composition escape 784 Here's a list of examples usages of these composition escape
672 sequences (categorized by `enum composition_method'). 785 sequences (categorized by `enum composition_method').
673 786
674 COMPOSITION_RELATIVE: 787 COMPOSITION_RELATIVE:
675 ESC 0 CHAR [ CHAR ] ESC 1 788 ESC 0 CHAR [ CHAR ] ESC 1
676 COMPOSITOIN_WITH_RULE: 789 COMPOSITOIN_WITH_RULE:
677 ESC 2 CHAR [ RULE CHAR ] ESC 1 790 ESC 2 CHAR [ RULE CHAR ] ESC 1
678 COMPOSITION_WITH_ALTCHARS: 791 COMPOSITION_WITH_ALTCHARS:
679 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 792 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
680 COMPOSITION_WITH_RULE_ALTCHARS: 793 COMPOSITION_WITH_RULE_ALTCHARS:
681 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ 794 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
682 795
683 enum iso_code_class_type iso_code_class[256]; 796 enum iso_code_class_type iso_code_class[256];
684 797
685 #define CHARSET_OK(idx, charset) \ 798 #define CHARSET_OK(idx, charset) \
710 { 823 {
711 int mask = CODING_CATEGORY_MASK_ISO; 824 int mask = CODING_CATEGORY_MASK_ISO;
712 int mask_found = 0; 825 int mask_found = 0;
713 int reg[4], shift_out = 0, single_shifting = 0; 826 int reg[4], shift_out = 0, single_shifting = 0;
714 int c, c1, i, charset; 827 int c, c1, i, charset;
828 /* Dummy for ONE_MORE_BYTE. */
829 struct coding_system dummy_coding;
830 struct coding_system *coding = &dummy_coding;
715 831
716 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; 832 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
717 while (mask && src < src_end) 833 while (mask && src < src_end)
718 { 834 {
719 c = *src++; 835 ONE_MORE_BYTE (c);
720 switch (c) 836 switch (c)
721 { 837 {
722 case ISO_CODE_ESC: 838 case ISO_CODE_ESC:
723 single_shifting = 0; 839 single_shifting = 0;
724 if (src >= src_end) 840 ONE_MORE_BYTE (c);
725 break;
726 c = *src++;
727 if (c >= '(' && c <= '/') 841 if (c >= '(' && c <= '/')
728 { 842 {
729 /* Designation sequence for a charset of dimension 1. */ 843 /* Designation sequence for a charset of dimension 1. */
730 if (src >= src_end) 844 ONE_MORE_BYTE (c1);
731 break;
732 c1 = *src++;
733 if (c1 < ' ' || c1 >= 0x80 845 if (c1 < ' ' || c1 >= 0x80
734 || (charset = iso_charset_table[0][c >= ','][c1]) < 0) 846 || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
735 /* Invalid designation sequence. Just ignore. */ 847 /* Invalid designation sequence. Just ignore. */
736 break; 848 break;
737 reg[(c - '(') % 4] = charset; 849 reg[(c - '(') % 4] = charset;
738 } 850 }
739 else if (c == '$') 851 else if (c == '$')
740 { 852 {
741 /* Designation sequence for a charset of dimension 2. */ 853 /* Designation sequence for a charset of dimension 2. */
742 if (src >= src_end) 854 ONE_MORE_BYTE (c);
743 break;
744 c = *src++;
745 if (c >= '@' && c <= 'B') 855 if (c >= '@' && c <= 'B')
746 /* Designation for JISX0208.1978, GB2312, or JISX0208. */ 856 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
747 reg[0] = charset = iso_charset_table[1][0][c]; 857 reg[0] = charset = iso_charset_table[1][0][c];
748 else if (c >= '(' && c <= '/') 858 else if (c >= '(' && c <= '/')
749 { 859 {
750 if (src >= src_end) 860 ONE_MORE_BYTE (c1);
751 break;
752 c1 = *src++;
753 if (c1 < ' ' || c1 >= 0x80 861 if (c1 < ' ' || c1 >= 0x80
754 || (charset = iso_charset_table[1][c >= ','][c1]) < 0) 862 || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
755 /* Invalid designation sequence. Just ignore. */ 863 /* Invalid designation sequence. Just ignore. */
756 break; 864 break;
757 reg[(c - '(') % 4] = charset; 865 reg[(c - '(') % 4] = charset;
886 mask_found |= CODING_CATEGORY_MASK_ISO_8_1; 994 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
887 /* Check the length of succeeding codes of the range 995 /* Check the length of succeeding codes of the range
888 0xA0..0FF. If the byte length is odd, we exclude 996 0xA0..0FF. If the byte length is odd, we exclude
889 CODING_CATEGORY_MASK_ISO_8_2. We can check this only 997 CODING_CATEGORY_MASK_ISO_8_2. We can check this only
890 when we are not single shifting. */ 998 when we are not single shifting. */
891 if (!single_shifting) 999 if (!single_shifting
1000 && mask & CODING_CATEGORY_MASK_ISO_8_2)
892 { 1001 {
893 while (src < src_end && *src >= 0xA0) 1002 int i = 0;
894 src++; 1003 while (src < src_end)
895 if ((src - src_begin - 1) & 1 && src < src_end) 1004 {
1005 ONE_MORE_BYTE (c);
1006 if (c < 0xA0)
1007 break;
1008 i++;
1009 }
1010
1011 if (i & 1 && src < src_end)
896 mask &= ~CODING_CATEGORY_MASK_ISO_8_2; 1012 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
897 else 1013 else
898 mask_found |= CODING_CATEGORY_MASK_ISO_8_2; 1014 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
899 } 1015 }
900 } 1016 }
901 break; 1017 break;
902 } 1018 }
903 } 1019 }
904 1020 label_end_of_loop:
905 return (mask & mask_found); 1021 return (mask & mask_found);
906 } 1022 }
907 1023
908 /* Decode a character of which charset is CHARSET and the 1st position 1024 /* Decode a character of which charset is CHARSET, the 1st position
909 code is C1. If dimension of CHARSET is 2, the 2nd position code is 1025 code is C1, the 2nd position code is C2, and return the decoded
910 fetched from SRC and set to C2. If CHARSET is negative, it means 1026 character code. If the variable `translation_table' is non-nil,
911 that we are decoding ill formed text, and what we can do is just to 1027 returned the translated code. */
912 read C1 as is. 1028
913 1029 #define DECODE_ISO_CHARACTER(charset, c1, c2) \
914 If we are now in the middle of composition sequence, the decoded 1030 (NILP (translation_table) \
915 character may be ALTCHAR (see the comment above). In that case, 1031 ? MAKE_CHAR (charset, c1, c2) \
916 the character goes to coding->cmp_data->data instead of DST. */ 1032 : translate_char (translation_table, -1, charset, c1, c2))
917
918 #define DECODE_ISO_CHARACTER(charset, c1) \
919 do { \
920 int c_alt = -1, charset_alt = (charset); \
921 if (charset_alt >= 0) \
922 { \
923 if (CHARSET_DIMENSION (charset_alt) == 2) \
924 { \
925 ONE_MORE_BYTE (c2); \
926 if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \
927 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \
928 { \
929 src--; \
930 charset_alt = CHARSET_ASCII; \
931 } \
932 } \
933 if (!NILP (translation_table) \
934 && ((c_alt = translate_char (translation_table, \
935 -1, charset_alt, c1, c2)) >= 0)) \
936 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
937 } \
938 if (! COMPOSING_P (coding) \
939 || coding->composing == COMPOSITION_RELATIVE \
940 || coding->composing == COMPOSITION_WITH_RULE) \
941 { \
942 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
943 DECODE_CHARACTER_ASCII (c1); \
944 else if (CHARSET_DIMENSION (charset_alt) == 1) \
945 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
946 else \
947 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
948 } \
949 if (COMPOSING_P (coding) \
950 && coding->composing != COMPOSITION_RELATIVE) \
951 { \
952 if (c_alt < 0) \
953 c_alt = MAKE_CHAR (charset_alt, c1, c2); \
954 CODING_ADD_COMPOSITION_COMPONENT (coding, c_alt); \
955 coding->composition_rule_follows \
956 = coding->composing != COMPOSITION_WITH_ALTCHARS; \
957 } \
958 } while (0)
959 1033
960 /* Set designation state into CODING. */ 1034 /* Set designation state into CODING. */
961 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ 1035 #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
962 do { \ 1036 do { \
963 int charset; \ 1037 int charset; \
1062 loop again. We can't allocate memory here directly because \ 1136 loop again. We can't allocate memory here directly because \
1063 it may cause buffer/string relocation. */ \ 1137 it may cause buffer/string relocation. */ \
1064 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \ 1138 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1065 >= COMPOSITION_DATA_SIZE) \ 1139 >= COMPOSITION_DATA_SIZE) \
1066 { \ 1140 { \
1067 result = CODING_FINISH_INSUFFICIENT_CMP; \ 1141 coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
1068 goto label_end_of_loop_2; \ 1142 goto label_end_of_loop; \
1069 } \ 1143 } \
1070 coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \ 1144 coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
1071 : c1 == '2' ? COMPOSITION_WITH_RULE \ 1145 : c1 == '2' ? COMPOSITION_WITH_RULE \
1072 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \ 1146 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
1073 : COMPOSITION_WITH_RULE_ALTCHARS); \ 1147 : COMPOSITION_WITH_RULE_ALTCHARS); \
1120 int nref = (c1) % 9; \ 1194 int nref = (c1) % 9; \
1121 if (gref == 4) gref = 10; \ 1195 if (gref == 4) gref = 10; \
1122 if (nref == 4) nref = 10; \ 1196 if (nref == 4) nref = 10; \
1123 rule = COMPOSITION_ENCODE_RULE (gref, nref); \ 1197 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
1124 } \ 1198 } \
1125 else if (c1 < 93) /* new format (after ver.21 */ \ 1199 else if (c1 < 93) /* new format (after ver.21) */ \
1126 { \ 1200 { \
1127 ONE_MORE_BYTE (c2); \ 1201 ONE_MORE_BYTE (c2); \
1128 rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \ 1202 rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
1129 } \ 1203 } \
1130 CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \ 1204 CODING_ADD_COMPOSITION_COMPONENT (coding, rule); \
1132 } while (0) 1206 } while (0)
1133 1207
1134 1208
1135 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 1209 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
1136 1210
1137 int 1211 static void
1138 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) 1212 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1139 struct coding_system *coding; 1213 struct coding_system *coding;
1140 unsigned char *source, *destination; 1214 unsigned char *source, *destination;
1141 int src_bytes, dst_bytes; 1215 int src_bytes, dst_bytes;
1142 { 1216 {
1143 unsigned char *src = source; 1217 unsigned char *src = source;
1144 unsigned char *src_end = source + src_bytes; 1218 unsigned char *src_end = source + src_bytes;
1145 unsigned char *dst = destination; 1219 unsigned char *dst = destination;
1146 unsigned char *dst_end = destination + dst_bytes; 1220 unsigned char *dst_end = destination + dst_bytes;
1147 /* Since the maximum bytes produced by each loop is 7, we subtract 6
1148 from DST_END to assure that overflow checking is necessary only
1149 at the head of loop. */
1150 unsigned char *adjusted_dst_end = dst_end - 6;
1151 int charset;
1152 /* Charsets invoked to graphic plane 0 and 1 respectively. */ 1221 /* Charsets invoked to graphic plane 0 and 1 respectively. */
1153 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1222 int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1154 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); 1223 int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1155 Lisp_Object translation_table 1224 /* SRC_BASE remembers the start position in source in each loop.
1156 = coding->translation_table_for_decode; 1225 The loop will be exited when there's not enough source code
1157 int result = CODING_FINISH_NORMAL; 1226 (within macro ONE_MORE_BYTE), or when there's not enough
1158 1227 destination area to produce a character (within macro
1159 if (!NILP (Venable_character_translation) && NILP (translation_table)) 1228 EMIT_CHAR). */
1160 translation_table = Vstandard_translation_table_for_decode; 1229 unsigned char *src_base;
1161 1230 int c, charset;
1162 coding->produced_char = 0; 1231 Lisp_Object translation_table;
1163 coding->fake_multibyte = 0; 1232
1164 while (src < src_end && (dst_bytes 1233 if (NILP (Venable_character_translation))
1165 ? (dst < adjusted_dst_end) 1234 translation_table = Qnil;
1166 : (dst < src - 6))) 1235 else
1167 { 1236 {
1168 /* SRC_BASE remembers the start position in source in each loop. 1237 translation_table = coding->translation_table_for_decode;
1169 The loop will be exited when there's not enough source text 1238 if (NILP (translation_table))
1170 to analyze long escape sequence or 2-byte code (within macros 1239 translation_table = Vstandard_translation_table_for_decode;
1171 ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset 1240 }
1172 to SRC_BASE before exiting. */ 1241
1173 unsigned char *src_base = src; 1242 coding->result = CODING_FINISH_NORMAL;
1174 int c1 = *src++, c2; 1243
1244 while (1)
1245 {
1246 int c1, c2;
1247
1248 src_base = src;
1249 ONE_MORE_BYTE (c1);
1175 1250
1176 /* We produce no character or one character. */ 1251 /* We produce no character or one character. */
1177 switch (iso_code_class [c1]) 1252 switch (iso_code_class [c1])
1178 { 1253 {
1179 case ISO_0x20_or_0x7F: 1254 case ISO_0x20_or_0x7F:
1180 if (COMPOSING_P (coding) && coding->composition_rule_follows) 1255 if (COMPOSING_P (coding) && coding->composition_rule_follows)
1181 { 1256 {
1182 DECODE_COMPOSITION_RULE (c1); 1257 DECODE_COMPOSITION_RULE (c1);
1183 break; 1258 continue;
1184 } 1259 }
1185 if (charset0 < 0 || CHARSET_CHARS (charset0) == 94) 1260 if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1186 { 1261 {
1187 /* This is SPACE or DEL. */ 1262 /* This is SPACE or DEL. */
1188 *dst++ = c1; 1263 charset = CHARSET_ASCII;
1189 coding->produced_char++;
1190 break; 1264 break;
1191 } 1265 }
1192 /* This is a graphic character, we fall down ... */ 1266 /* This is a graphic character, we fall down ... */
1193 1267
1194 case ISO_graphic_plane_0: 1268 case ISO_graphic_plane_0:
1195 if (COMPOSING_P (coding) && coding->composition_rule_follows) 1269 if (COMPOSING_P (coding) && coding->composition_rule_follows)
1196 DECODE_COMPOSITION_RULE (c1); 1270 {
1197 else 1271 DECODE_COMPOSITION_RULE (c1);
1198 DECODE_ISO_CHARACTER (charset0, c1); 1272 continue;
1273 }
1274 charset = charset0;
1199 break; 1275 break;
1200 1276
1201 case ISO_0xA0_or_0xFF: 1277 case ISO_0xA0_or_0xFF:
1202 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 1278 if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1203 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) 1279 || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1204 goto label_invalid_code; 1280 goto label_invalid_code;
1205 /* This is a graphic character, we fall down ... */ 1281 /* This is a graphic character, we fall down ... */
1206 1282
1207 case ISO_graphic_plane_1: 1283 case ISO_graphic_plane_1:
1208 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) 1284 if (charset1 < 0)
1209 goto label_invalid_code; 1285 goto label_invalid_code;
1210 DECODE_ISO_CHARACTER (charset1, c1); 1286 charset = charset1;
1211 break; 1287 break;
1212 1288
1213 case ISO_control_code: 1289 case ISO_control_0:
1214 if (COMPOSING_P (coding)) 1290 if (COMPOSING_P (coding))
1215 DECODE_COMPOSITION_END ('1'); 1291 DECODE_COMPOSITION_END ('1');
1216 1292
1217 /* All ISO2022 control characters in this class have the 1293 /* All ISO2022 control characters in this class have the
1218 same representation in Emacs internal format. */ 1294 same representation in Emacs internal format. */
1219 if (c1 == '\n' 1295 if (c1 == '\n'
1220 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) 1296 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1221 && (coding->eol_type == CODING_EOL_CR 1297 && (coding->eol_type == CODING_EOL_CR
1222 || coding->eol_type == CODING_EOL_CRLF)) 1298 || coding->eol_type == CODING_EOL_CRLF))
1223 { 1299 {
1224 result = CODING_FINISH_INCONSISTENT_EOL; 1300 coding->result = CODING_FINISH_INCONSISTENT_EOL;
1225 goto label_end_of_loop_2; 1301 goto label_end_of_loop;
1226 } 1302 }
1227 *dst++ = c1; 1303 charset = CHARSET_ASCII;
1228 coding->produced_char++;
1229 break; 1304 break;
1305
1306 case ISO_control_1:
1307 if (COMPOSING_P (coding))
1308 DECODE_COMPOSITION_END ('1');
1309 goto label_invalid_code;
1230 1310
1231 case ISO_carriage_return: 1311 case ISO_carriage_return:
1232 if (COMPOSING_P (coding)) 1312 if (COMPOSING_P (coding))
1233 DECODE_COMPOSITION_END ('1'); 1313 DECODE_COMPOSITION_END ('1');
1234 1314
1235 if (coding->eol_type == CODING_EOL_CR) 1315 if (coding->eol_type == CODING_EOL_CR)
1236 *dst++ = '\n'; 1316 c1 = '\n';
1237 else if (coding->eol_type == CODING_EOL_CRLF) 1317 else if (coding->eol_type == CODING_EOL_CRLF)
1238 { 1318 {
1239 ONE_MORE_BYTE (c1); 1319 ONE_MORE_BYTE (c1);
1240 if (c1 == ISO_CODE_LF) 1320 if (c1 != ISO_CODE_LF)
1241 *dst++ = '\n';
1242 else
1243 { 1321 {
1244 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) 1322 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1245 { 1323 {
1246 result = CODING_FINISH_INCONSISTENT_EOL; 1324 coding->result = CODING_FINISH_INCONSISTENT_EOL;
1247 goto label_end_of_loop_2; 1325 goto label_end_of_loop;
1248 } 1326 }
1249 src--; 1327 src--;
1250 *dst++ = '\r'; 1328 c1 = '\r';
1251 } 1329 }
1252 } 1330 }
1253 else 1331 charset = CHARSET_ASCII;
1254 *dst++ = c1;
1255 coding->produced_char++;
1256 break; 1332 break;
1257 1333
1258 case ISO_shift_out: 1334 case ISO_shift_out:
1259 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) 1335 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1260 || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0) 1336 || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1261 goto label_invalid_code; 1337 goto label_invalid_code;
1262 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; 1338 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1263 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1339 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1264 break; 1340 continue;
1265 1341
1266 case ISO_shift_in: 1342 case ISO_shift_in:
1267 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)) 1343 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1268 goto label_invalid_code; 1344 goto label_invalid_code;
1269 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; 1345 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1270 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1346 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1271 break; 1347 continue;
1272 1348
1273 case ISO_single_shift_2_7: 1349 case ISO_single_shift_2_7:
1274 case ISO_single_shift_2: 1350 case ISO_single_shift_2:
1275 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)) 1351 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1276 goto label_invalid_code; 1352 goto label_invalid_code;
1327 ONE_MORE_BYTE (c2); 1403 ONE_MORE_BYTE (c2);
1328 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2); 1404 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1329 } 1405 }
1330 else 1406 else
1331 goto label_invalid_code; 1407 goto label_invalid_code;
1332 break; 1408 /* We must update these variables now. */
1409 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1410 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1411 continue;
1333 1412
1334 case 'n': /* invocation of locking-shift-2 */ 1413 case 'n': /* invocation of locking-shift-2 */
1335 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) 1414 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1336 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) 1415 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1337 goto label_invalid_code; 1416 goto label_invalid_code;
1338 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; 1417 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1339 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1418 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1340 break; 1419 continue;
1341 1420
1342 case 'o': /* invocation of locking-shift-3 */ 1421 case 'o': /* invocation of locking-shift-3 */
1343 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) 1422 if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1344 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) 1423 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1345 goto label_invalid_code; 1424 goto label_invalid_code;
1346 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; 1425 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1347 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1426 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1348 break; 1427 continue;
1349 1428
1350 case 'N': /* invocation of single-shift-2 */ 1429 case 'N': /* invocation of single-shift-2 */
1351 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT) 1430 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1352 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) 1431 || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1353 goto label_invalid_code; 1432 goto label_invalid_code;
1433 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1354 ONE_MORE_BYTE (c1); 1434 ONE_MORE_BYTE (c1);
1355 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1356 DECODE_ISO_CHARACTER (charset, c1);
1357 break; 1435 break;
1358 1436
1359 case 'O': /* invocation of single-shift-3 */ 1437 case 'O': /* invocation of single-shift-3 */
1360 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT) 1438 if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1361 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) 1439 || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1362 goto label_invalid_code; 1440 goto label_invalid_code;
1441 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1363 ONE_MORE_BYTE (c1); 1442 ONE_MORE_BYTE (c1);
1364 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1365 DECODE_ISO_CHARACTER (charset, c1);
1366 break; 1443 break;
1367 1444
1368 case '0': case '2': case '3': case '4': /* start composition */ 1445 case '0': case '2': case '3': case '4': /* start composition */
1369 DECODE_COMPOSITION_START (c1); 1446 DECODE_COMPOSITION_START (c1);
1370 break; 1447 continue;
1371 1448
1372 case '1': /* end composition */ 1449 case '1': /* end composition */
1373 DECODE_COMPOSITION_END (c1); 1450 DECODE_COMPOSITION_END (c1);
1374 break; 1451 continue;
1375 1452
1376 case '[': /* specification of direction */ 1453 case '[': /* specification of direction */
1377 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) 1454 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1378 goto label_invalid_code; 1455 goto label_invalid_code;
1379 /* For the moment, nested direction is not supported. 1456 /* For the moment, nested direction is not supported.
1403 break; 1480 break;
1404 1481
1405 default: 1482 default:
1406 goto label_invalid_code; 1483 goto label_invalid_code;
1407 } 1484 }
1408 break; 1485 continue;
1409 1486
1410 default: 1487 default:
1411 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION)) 1488 if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1412 goto label_invalid_code; 1489 goto label_invalid_code;
1413 if (c1 >= 0x28 && c1 <= 0x2B) 1490 if (c1 >= 0x28 && c1 <= 0x2B)
1419 { /* designation of DIMENSION1_CHARS96 character set */ 1496 { /* designation of DIMENSION1_CHARS96 character set */
1420 ONE_MORE_BYTE (c2); 1497 ONE_MORE_BYTE (c2);
1421 DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2); 1498 DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1422 } 1499 }
1423 else 1500 else
1424 { 1501 goto label_invalid_code;
1425 goto label_invalid_code; 1502 /* We must update these variables now. */
1426 } 1503 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1504 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1505 continue;
1427 } 1506 }
1428 /* We must update these variables now. */ 1507 }
1429 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 1508
1430 charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); 1509 /* Now we know CHARSET and 1st position code C1 of a character.
1431 break; 1510 Produce a multibyte sequence for that character while getting
1432 1511 2nd position code C2 if necessary. */
1433 label_invalid_code: 1512 if (CHARSET_DIMENSION (charset) == 2)
1434 if (COMPOSING_P (coding)) 1513 {
1435 DECODE_COMPOSITION_END ('1'); 1514 ONE_MORE_BYTE (c2);
1436 coding->produced_char += src - src_base; 1515 if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
1437 while (src_base < src) 1516 /* C2 is not in a valid range. */
1438 *dst++ = (*src_base++) & 0x7F; 1517 goto label_invalid_code;
1439 } 1518 }
1519 c = DECODE_ISO_CHARACTER (charset, c1, c2);
1520 EMIT_CHAR (c);
1440 continue; 1521 continue;
1441 1522
1442 label_end_of_loop: 1523 label_invalid_code:
1443 result = CODING_FINISH_INSUFFICIENT_SRC; 1524 coding->errors++;
1444 label_end_of_loop_2: 1525 if (COMPOSING_P (coding))
1526 DECODE_COMPOSITION_END ('1');
1445 src = src_base; 1527 src = src_base;
1446 break; 1528 c = *src++;
1447 } 1529 EMIT_CHAR (c);
1448 1530 }
1449 if (src < src_end) 1531
1450 { 1532 label_end_of_loop:
1451 if (result == CODING_FINISH_NORMAL) 1533 coding->consumed = coding->consumed_char = src_base - source;
1452 result = CODING_FINISH_INSUFFICIENT_DST;
1453 else if (result != CODING_FINISH_INCONSISTENT_EOL
1454 && coding->mode & CODING_MODE_LAST_BLOCK)
1455 {
1456 /* This is the last block of the text to be decoded. We had
1457 better just flush out all remaining codes in the text
1458 although they are not valid characters. */
1459 if (COMPOSING_P (coding))
1460 DECODE_COMPOSITION_END ('1');
1461 src_bytes = src_end - src;
1462 if (dst_bytes && (dst_end - dst < src_end - src))
1463 src_end = src + (dst_end - dst);
1464 coding->produced_char += src_end - src;
1465 while (src < src_end)
1466 *dst++ = (*src++) & 0x7F;
1467 }
1468 }
1469
1470 coding->consumed = coding->consumed_char = src - source;
1471 coding->produced = dst - destination; 1534 coding->produced = dst - destination;
1472 return result; 1535 return;
1473 } 1536 }
1537
1474 1538
1475 /* ISO2022 encoding stuff. */ 1539 /* ISO2022 encoding stuff. */
1476 1540
1477 /* 1541 /*
1478 It is not enough to say just "ISO2022" on encoding, we have to 1542 It is not enough to say just "ISO2022" on encoding, we have to
1492 defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more 1556 defined by macros CODING_FLAG_ISO_XXX. See `coding.h' for more
1493 details. 1557 details.
1494 */ 1558 */
1495 1559
1496 /* Produce codes (escape sequence) for designating CHARSET to graphic 1560 /* Produce codes (escape sequence) for designating CHARSET to graphic
1497 register REG. If <final-char> of CHARSET is '@', 'A', or 'B' and 1561 register REG at DST, and increment DST. If <final-char> of CHARSET is
1498 the coding system CODING allows, produce designation sequence of 1562 '@', 'A', or 'B' and the coding system CODING allows, produce
1499 short-form. */ 1563 designation sequence of short-form. */
1500 1564
1501 #define ENCODE_DESIGNATION(charset, reg, coding) \ 1565 #define ENCODE_DESIGNATION(charset, reg, coding) \
1502 do { \ 1566 do { \
1503 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \ 1567 unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \
1504 char *intermediate_char_94 = "()*+"; \ 1568 char *intermediate_char_94 = "()*+"; \
1505 char *intermediate_char_96 = ",-./"; \ 1569 char *intermediate_char_96 = ",-./"; \
1506 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \ 1570 int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
1571 \
1507 if (revision < 255) \ 1572 if (revision < 255) \
1508 { \ 1573 { \
1509 *dst++ = ISO_CODE_ESC; \ 1574 *dst++ = ISO_CODE_ESC; \
1510 *dst++ = '&'; \ 1575 *dst++ = '&'; \
1511 *dst++ = '@' + revision; \ 1576 *dst++ = '@' + revision; \
1512 } \ 1577 } \
1513 *dst++ = ISO_CODE_ESC; \ 1578 *dst++ = ISO_CODE_ESC; \
1514 if (CHARSET_DIMENSION (charset) == 1) \ 1579 if (CHARSET_DIMENSION (charset) == 1) \
1515 { \ 1580 { \
1516 if (CHARSET_CHARS (charset) == 94) \ 1581 if (CHARSET_CHARS (charset) == 94) \
1517 *dst++ = (unsigned char) (intermediate_char_94[reg]); \ 1582 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1518 else \ 1583 else \
1521 else \ 1586 else \
1522 { \ 1587 { \
1523 *dst++ = '$'; \ 1588 *dst++ = '$'; \
1524 if (CHARSET_CHARS (charset) == 94) \ 1589 if (CHARSET_CHARS (charset) == 94) \
1525 { \ 1590 { \
1526 if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \ 1591 if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
1527 || reg != 0 \ 1592 || reg != 0 \
1528 || final_char < '@' || final_char > 'B') \ 1593 || final_char < '@' || final_char > 'B') \
1529 *dst++ = (unsigned char) (intermediate_char_94[reg]); \ 1594 *dst++ = (unsigned char) (intermediate_char_94[reg]); \
1530 } \ 1595 } \
1531 else \ 1596 else \
1532 *dst++ = (unsigned char) (intermediate_char_96[reg]); \ 1597 *dst++ = (unsigned char) (intermediate_char_96[reg]); \
1533 } \ 1598 } \
1534 *dst++ = final_char; \ 1599 *dst++ = final_char; \
1535 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \ 1600 CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
1536 } while (0) 1601 } while (0)
1537 1602
1538 /* The following two macros produce codes (control character or escape 1603 /* The following two macros produce codes (control character or escape
1539 sequence) for ISO2022 single-shift functions (single-shift-2 and 1604 sequence) for ISO2022 single-shift functions (single-shift-2 and
1542 #define ENCODE_SINGLE_SHIFT_2 \ 1607 #define ENCODE_SINGLE_SHIFT_2 \
1543 do { \ 1608 do { \
1544 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ 1609 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1545 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ 1610 *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
1546 else \ 1611 else \
1547 { \ 1612 *dst++ = ISO_CODE_SS2; \
1548 *dst++ = ISO_CODE_SS2; \
1549 coding->fake_multibyte = 1; \
1550 } \
1551 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ 1613 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1552 } while (0) 1614 } while (0)
1553 1615
1554 #define ENCODE_SINGLE_SHIFT_3 \ 1616 #define ENCODE_SINGLE_SHIFT_3 \
1555 do { \ 1617 do { \
1556 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ 1618 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
1557 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ 1619 *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
1558 else \ 1620 else \
1559 { \ 1621 *dst++ = ISO_CODE_SS3; \
1560 *dst++ = ISO_CODE_SS3; \
1561 coding->fake_multibyte = 1; \
1562 } \
1563 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ 1622 CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
1564 } while (0) 1623 } while (0)
1565 1624
1566 /* The following four macros produce codes (control character or 1625 /* The following four macros produce codes (control character or
1567 escape sequence) for ISO2022 locking-shift functions (shift-in, 1626 escape sequence) for ISO2022 locking-shift functions (shift-in,
1568 shift-out, locking-shift-2, and locking-shift-3). */ 1627 shift-out, locking-shift-2, and locking-shift-3). */
1569 1628
1570 #define ENCODE_SHIFT_IN \ 1629 #define ENCODE_SHIFT_IN \
1571 do { \ 1630 do { \
1572 *dst++ = ISO_CODE_SI; \ 1631 *dst++ = ISO_CODE_SI; \
1573 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \ 1632 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1574 } while (0) 1633 } while (0)
1575 1634
1576 #define ENCODE_SHIFT_OUT \ 1635 #define ENCODE_SHIFT_OUT \
1577 do { \ 1636 do { \
1578 *dst++ = ISO_CODE_SO; \ 1637 *dst++ = ISO_CODE_SO; \
1579 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \ 1638 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1580 } while (0) 1639 } while (0)
1581 1640
1582 #define ENCODE_LOCKING_SHIFT_2 \ 1641 #define ENCODE_LOCKING_SHIFT_2 \
1583 do { \ 1642 do { \
1584 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \ 1643 *dst++ = ISO_CODE_ESC, *dst++ = 'n'; \
1585 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \ 1644 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1586 } while (0) 1645 } while (0)
1587 1646
1588 #define ENCODE_LOCKING_SHIFT_3 \ 1647 #define ENCODE_LOCKING_SHIFT_3 \
1589 do { \ 1648 do { \
1590 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \ 1649 *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
1591 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \ 1650 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1592 } while (0) 1651 } while (0)
1593 1652
1594 /* Produce codes for a DIMENSION1 character whose character set is 1653 /* Produce codes for a DIMENSION1 character whose character set is
1595 CHARSET and whose position-code is C1. Designation and invocation 1654 CHARSET and whose position-code is C1. Designation and invocation
1596 sequences are also produced in advance if necessary. */ 1655 sequences are also produced in advance if necessary. */
1597
1598 1656
1599 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \ 1657 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
1600 do { \ 1658 do { \
1601 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \ 1659 if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
1602 { \ 1660 { \
1678 dst = encode_invocation_designation (charset, coding, dst); \ 1736 dst = encode_invocation_designation (charset, coding, dst); \
1679 } while (1) 1737 } while (1)
1680 1738
1681 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ 1739 #define ENCODE_ISO_CHARACTER(charset, c1, c2) \
1682 do { \ 1740 do { \
1683 int c_alt, charset_alt; \ 1741 int alt_charset = charset; \
1684 \ 1742 \
1685 if (!NILP (translation_table) \ 1743 if (CHARSET_DEFINED_P (charset)) \
1686 && ((c_alt = translate_char (translation_table, -1, \
1687 charset, c1, c2)) \
1688 >= 0)) \
1689 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
1690 else \
1691 charset_alt = charset; \
1692 if (CHARSET_DEFINED_P (charset_alt)) \
1693 { \ 1744 { \
1694 if (CHARSET_DIMENSION (charset_alt) == 1) \ 1745 if (CHARSET_DIMENSION (charset) == 1) \
1695 { \ 1746 { \
1696 if (charset == CHARSET_ASCII \ 1747 if (charset == CHARSET_ASCII \
1697 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \ 1748 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
1698 charset_alt = charset_latin_jisx0201; \ 1749 alt_charset = charset_latin_jisx0201; \
1699 ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \ 1750 ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \
1700 } \ 1751 } \
1701 else \ 1752 else \
1702 { \ 1753 { \
1703 if (charset == charset_jisx0208 \ 1754 if (charset == charset_jisx0208 \
1704 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \ 1755 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
1705 charset_alt = charset_jisx0208_1978; \ 1756 alt_charset = charset_jisx0208_1978; \
1706 ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \ 1757 ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \
1707 } \ 1758 } \
1708 } \ 1759 } \
1709 else \ 1760 else \
1710 { \ 1761 { \
1711 if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ 1762 *dst++ = c1; \
1712 { \ 1763 if (c2 >= 0) \
1713 *dst++ = charset & 0x7f; \ 1764 *dst++ = c2; \
1714 *dst++ = c1 & 0x7f; \
1715 if (c2) \
1716 *dst++ = c2 & 0x7f; \
1717 } \
1718 else \
1719 { \
1720 *dst++ = charset; \
1721 *dst++ = c1; \
1722 if (c2) \
1723 *dst++ = c2; \
1724 } \
1725 } \ 1765 } \
1726 coding->consumed_char++; \
1727 } while (0) 1766 } while (0)
1728 1767
1729 /* Produce designation and invocation codes at a place pointed by DST 1768 /* Produce designation and invocation codes at a place pointed by DST
1730 to use CHARSET. The element `spec.iso2022' of *CODING is updated. 1769 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1731 Return new DST. */ 1770 Return new DST. */
1784 else 1823 else
1785 ENCODE_LOCKING_SHIFT_3; 1824 ENCODE_LOCKING_SHIFT_3;
1786 break; 1825 break;
1787 } 1826 }
1788 } 1827 }
1828
1789 return dst; 1829 return dst;
1790 } 1830 }
1791 1831
1792 /* Produce 2-byte codes for encoded composition rule RULE. */ 1832 /* Produce 2-byte codes for encoded composition rule RULE. */
1793 1833
1847 coding->composing = COMPOSITION_RELATIVE; \ 1887 coding->composing = COMPOSITION_RELATIVE; \
1848 } while (0) 1888 } while (0)
1849 1889
1850 /* The following three macros produce codes for indicating direction 1890 /* The following three macros produce codes for indicating direction
1851 of text. */ 1891 of text. */
1852 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \ 1892 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
1853 do { \ 1893 do { \
1854 if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \ 1894 if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
1855 *dst++ = ISO_CODE_ESC, *dst++ = '['; \ 1895 *dst++ = ISO_CODE_ESC, *dst++ = '['; \
1856 else \ 1896 else \
1857 *dst++ = ISO_CODE_CSI; \ 1897 *dst++ = ISO_CODE_CSI; \
1858 } while (0) 1898 } while (0)
1859 1899
1860 #define ENCODE_DIRECTION_R2L \ 1900 #define ENCODE_DIRECTION_R2L \
1861 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']' 1901 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
1862 1902
1863 #define ENCODE_DIRECTION_L2R \ 1903 #define ENCODE_DIRECTION_L2R \
1864 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']' 1904 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
1865 1905
1866 /* Produce codes for designation and invocation to reset the graphic 1906 /* Produce codes for designation and invocation to reset the graphic
1867 planes and registers to initial state. */ 1907 planes and registers to initial state. */
1868 #define ENCODE_RESET_PLANE_AND_REGISTER \ 1908 #define ENCODE_RESET_PLANE_AND_REGISTER \
1869 do { \ 1909 do { \
1877 ENCODE_DESIGNATION \ 1917 ENCODE_DESIGNATION \
1878 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \ 1918 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1879 } while (0) 1919 } while (0)
1880 1920
1881 /* Produce designation sequences of charsets in the line started from 1921 /* Produce designation sequences of charsets in the line started from
1882 SRC to a place pointed by *DSTP, and update DSTP. 1922 SRC to a place pointed by DST, and return updated DST.
1883 1923
1884 If the current block ends before any end-of-line, we may fail to 1924 If the current block ends before any end-of-line, we may fail to
1885 find all the necessary designations. */ 1925 find all the necessary designations. */
1886 1926
1887 void 1927 static unsigned char *
1888 encode_designation_at_bol (coding, table, src, src_end, dstp) 1928 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
1889 struct coding_system *coding; 1929 struct coding_system *coding;
1890 Lisp_Object table; 1930 Lisp_Object translation_table;
1891 unsigned char *src, *src_end, **dstp; 1931 unsigned char *src, *src_end, *dst;
1892 { 1932 {
1893 int charset, c, found = 0, reg; 1933 int charset, c, found = 0, reg;
1894 /* Table of charsets to be designated to each graphic register. */ 1934 /* Table of charsets to be designated to each graphic register. */
1895 int r[4]; 1935 int r[4];
1896 unsigned char *dst = *dstp;
1897 1936
1898 for (reg = 0; reg < 4; reg++) 1937 for (reg = 0; reg < 4; reg++)
1899 r[reg] = -1; 1938 r[reg] = -1;
1900 1939
1901 while (src < src_end && *src != '\n' && found < 4) 1940 while (found < 4)
1902 { 1941 {
1903 int bytes = BYTES_BY_CHAR_HEAD (*src); 1942 ONE_MORE_CHAR (c);
1943 if (c == '\n')
1944 break;
1904 1945
1905 if (NILP (table)) 1946 charset = CHAR_CHARSET (c);
1906 charset = CHARSET_AT (src);
1907 else
1908 {
1909 int c_alt;
1910 unsigned char c1, c2;
1911
1912 SPLIT_STRING(src, bytes, charset, c1, c2);
1913 if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1914 charset = CHAR_CHARSET (c_alt);
1915 }
1916
1917 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); 1947 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1918 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0) 1948 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1919 { 1949 {
1920 found++; 1950 found++;
1921 r[reg] = charset; 1951 r[reg] = charset;
1922 } 1952 }
1923 1953 }
1924 src += bytes; 1954
1925 } 1955 label_end_of_loop:
1926
1927 if (found) 1956 if (found)
1928 { 1957 {
1929 for (reg = 0; reg < 4; reg++) 1958 for (reg = 0; reg < 4; reg++)
1930 if (r[reg] >= 0 1959 if (r[reg] >= 0
1931 && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg]) 1960 && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1932 ENCODE_DESIGNATION (r[reg], reg, coding); 1961 ENCODE_DESIGNATION (r[reg], reg, coding);
1933 *dstp = dst; 1962 }
1934 } 1963
1964 return dst;
1935 } 1965 }
1936 1966
1937 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ 1967 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
1938 1968
1939 int 1969 static void
1940 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) 1970 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1941 struct coding_system *coding; 1971 struct coding_system *coding;
1942 unsigned char *source, *destination; 1972 unsigned char *source, *destination;
1943 int src_bytes, dst_bytes; 1973 int src_bytes, dst_bytes;
1944 { 1974 {
1945 unsigned char *src = source; 1975 unsigned char *src = source;
1946 unsigned char *src_end = source + src_bytes; 1976 unsigned char *src_end = source + src_bytes;
1947 unsigned char *dst = destination; 1977 unsigned char *dst = destination;
1948 unsigned char *dst_end = destination + dst_bytes; 1978 unsigned char *dst_end = destination + dst_bytes;
1949 /* Since the maximum bytes produced by each loop is 14, we subtract 13 1979 /* Since the maximum bytes produced by each loop is 20, we subtract 19
1950 from DST_END to assure overflow checking is necessary only at the 1980 from DST_END to assure overflow checking is necessary only at the
1951 head of loop. */ 1981 head of loop. */
1952 unsigned char *adjusted_dst_end = dst_end - 13; 1982 unsigned char *adjusted_dst_end = dst_end - 19;
1953 Lisp_Object translation_table 1983 /* SRC_BASE remembers the start position in source in each loop.
1954 = coding->translation_table_for_encode; 1984 The loop will be exited when there's not enough source text to
1955 int result = CODING_FINISH_NORMAL; 1985 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
1956 1986 there's not enough destination area to produce encoded codes
1957 if (!NILP (Venable_character_translation) && NILP (translation_table)) 1987 (within macro EMIT_BYTES). */
1958 translation_table = Vstandard_translation_table_for_encode; 1988 unsigned char *src_base;
1989 int c;
1990 Lisp_Object translation_table;
1991
1992 if (NILP (Venable_character_translation))
1993 translation_table = Qnil;
1994 else
1995 {
1996 translation_table = coding->translation_table_for_encode;
1997 if (NILP (translation_table))
1998 translation_table = Vstandard_translation_table_for_encode;
1999 }
1959 2000
1960 coding->consumed_char = 0; 2001 coding->consumed_char = 0;
1961 coding->fake_multibyte = 0; 2002 coding->errors = 0;
1962 while (src < src_end && (dst_bytes 2003 while (1)
1963 ? (dst < adjusted_dst_end) 2004 {
1964 : (dst < src - 13))) 2005 int charset, c1, c2;
1965 { 2006
1966 /* SRC_BASE remembers the start position in source in each loop. 2007 src_base = src;
1967 The loop will be exited when there's not enough source text 2008
1968 to analyze multi-byte codes (within macros ONE_MORE_BYTE, 2009 if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
1969 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is 2010 {
1970 reset to SRC_BASE before exiting. */ 2011 coding->result = CODING_FINISH_INSUFFICIENT_DST;
1971 unsigned char *src_base = src; 2012 break;
1972 int charset, c1, c2, c3, c4; 2013 }
1973 2014
1974 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL 2015 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1975 && CODING_SPEC_ISO_BOL (coding)) 2016 && CODING_SPEC_ISO_BOL (coding))
1976 { 2017 {
1977 /* We have to produce designation sequences if any now. */ 2018 /* We have to produce designation sequences if any now. */
1978 encode_designation_at_bol (coding, translation_table, 2019 dst = encode_designation_at_bol (coding, translation_table,
1979 src, src_end, &dst); 2020 src, src_end, dst);
1980 CODING_SPEC_ISO_BOL (coding) = 0; 2021 CODING_SPEC_ISO_BOL (coding) = 0;
1981 } 2022 }
1982 2023
1983 /* Check composition start and end. */ 2024 /* Check composition start and end. */
1984 if (coding->composing != COMPOSITION_DISABLED 2025 if (coding->composing != COMPOSITION_DISABLED
2015 } 2056 }
2016 else 2057 else
2017 { 2058 {
2018 SPLIT_CHAR (c, charset, c1, c2); 2059 SPLIT_CHAR (c, charset, c1, c2);
2019 ENCODE_ISO_CHARACTER (charset, c1, c2); 2060 ENCODE_ISO_CHARACTER (charset, c1, c2);
2020 /* But, we didn't consume a character in SRC. */
2021 coding->consumed_char--;
2022 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) 2061 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2023 coding->composition_rule_follows = 1; 2062 coding->composition_rule_follows = 1;
2024 } 2063 }
2025 continue; 2064 continue;
2026 } 2065 }
2033 continue; 2072 continue;
2034 } 2073 }
2035 } 2074 }
2036 } 2075 }
2037 2076
2038 c1 = *src++; 2077 ONE_MORE_CHAR (c);
2039 /* Now encode one character. C1 is a control character, an 2078
2040 ASCII character, or a leading-code of multi-byte character. */ 2079 /* Now encode the character C. */
2041 switch (emacs_code_class[c1]) 2080 if (c < 0x20 || c == 0x7F)
2042 { 2081 {
2043 case EMACS_ascii_code: 2082 if (c == '\r')
2044 c2 = 0; 2083 {
2045 ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2); 2084 if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2046 break; 2085 {
2047 2086 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2048 case EMACS_control_code: 2087 ENCODE_RESET_PLANE_AND_REGISTER;
2049 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) 2088 *dst++ = c;
2050 ENCODE_RESET_PLANE_AND_REGISTER; 2089 continue;
2051 *dst++ = c1; 2090 }
2052 coding->consumed_char++; 2091 /* fall down to treat '\r' as '\n' ... */
2053 break; 2092 c = '\n';
2054 2093 }
2055 case EMACS_carriage_return_code: 2094 if (c == '\n')
2056 if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) 2095 {
2096 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2097 ENCODE_RESET_PLANE_AND_REGISTER;
2098 if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2099 bcopy (coding->spec.iso2022.initial_designation,
2100 coding->spec.iso2022.current_designation,
2101 sizeof coding->spec.iso2022.initial_designation);
2102 if (coding->eol_type == CODING_EOL_LF
2103 || coding->eol_type == CODING_EOL_UNDECIDED)
2104 *dst++ = ISO_CODE_LF;
2105 else if (coding->eol_type == CODING_EOL_CRLF)
2106 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2107 else
2108 *dst++ = ISO_CODE_CR;
2109 CODING_SPEC_ISO_BOL (coding) = 1;
2110 }
2111 else
2057 { 2112 {
2058 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) 2113 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2059 ENCODE_RESET_PLANE_AND_REGISTER; 2114 ENCODE_RESET_PLANE_AND_REGISTER;
2060 *dst++ = c1; 2115 *dst++ = c;
2061 coding->consumed_char++;
2062 break;
2063 } 2116 }
2064 /* fall down to treat '\r' as '\n' ... */ 2117 }
2065 2118 else if (ASCII_BYTE_P (c))
2066 case EMACS_linefeed_code: 2119 ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1);
2067 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL) 2120 else if (SINGLE_BYTE_CHAR_P (c))
2068 ENCODE_RESET_PLANE_AND_REGISTER; 2121 {
2069 if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL) 2122 *dst++ = c;
2070 bcopy (coding->spec.iso2022.initial_designation, 2123 coding->errors++;
2071 coding->spec.iso2022.current_designation, 2124 }
2072 sizeof coding->spec.iso2022.initial_designation); 2125 else
2073 if (coding->eol_type == CODING_EOL_LF 2126 {
2074 || coding->eol_type == CODING_EOL_UNDECIDED) 2127 SPLIT_CHAR (c, charset, c1, c2);
2075 *dst++ = ISO_CODE_LF; 2128 ENCODE_ISO_CHARACTER (charset, c1, c2);
2076 else if (coding->eol_type == CODING_EOL_CRLF) 2129 }
2077 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; 2130
2078 else 2131 coding->consumed_char++;
2079 *dst++ = ISO_CODE_CR; 2132 }
2080 CODING_SPEC_ISO_BOL (coding) = 1; 2133
2081 coding->consumed_char++; 2134 label_end_of_loop:
2082 break; 2135 coding->consumed = src_base - source;
2083
2084 case EMACS_leading_code_2:
2085 ONE_MORE_BYTE (c2);
2086 c3 = 0;
2087 if (c2 < 0xA0)
2088 {
2089 /* invalid sequence */
2090 *dst++ = c1;
2091 src--;
2092 coding->consumed_char++;
2093 }
2094 else
2095 ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
2096 break;
2097
2098 case EMACS_leading_code_3:
2099 TWO_MORE_BYTES (c2, c3);
2100 c4 = 0;
2101 if (c2 < 0xA0 || c3 < 0xA0)
2102 {
2103 /* invalid sequence */
2104 *dst++ = c1;
2105 src -= 2;
2106 coding->consumed_char++;
2107 }
2108 else if (c1 < LEADING_CODE_PRIVATE_11)
2109 ENCODE_ISO_CHARACTER (c1, c2, c3);
2110 else
2111 ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
2112 break;
2113
2114 case EMACS_leading_code_4:
2115 THREE_MORE_BYTES (c2, c3, c4);
2116 if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
2117 {
2118 /* invalid sequence */
2119 *dst++ = c1;
2120 src -= 3;
2121 coding->consumed_char++;
2122 }
2123 else
2124 ENCODE_ISO_CHARACTER (c2, c3, c4);
2125 break;
2126
2127 case EMACS_invalid_code:
2128 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2129 ENCODE_RESET_PLANE_AND_REGISTER;
2130 *dst++ = c1;
2131 coding->consumed_char++;
2132 break;
2133 }
2134 continue;
2135 label_end_of_loop:
2136 result = CODING_FINISH_INSUFFICIENT_SRC;
2137 src = src_base;
2138 break;
2139 }
2140
2141 if (src < src_end && result == CODING_FINISH_NORMAL)
2142 result = CODING_FINISH_INSUFFICIENT_DST;
2143
2144 /* If this is the last block of the text to be encoded, we must
2145 reset graphic planes and registers to the initial state, and
2146 flush out the carryover if any. */
2147 if (coding->mode & CODING_MODE_LAST_BLOCK)
2148 {
2149 ENCODE_RESET_PLANE_AND_REGISTER;
2150 if (COMPOSING_P (coding))
2151 *dst++ = ISO_CODE_ESC, *dst++ = '1';
2152 if (result == CODING_FINISH_INSUFFICIENT_SRC)
2153 {
2154 while (src < src_end && dst < dst_end)
2155 *dst++ = *src++;
2156 }
2157 }
2158 coding->consumed = src - source;
2159 coding->produced = coding->produced_char = dst - destination; 2136 coding->produced = coding->produced_char = dst - destination;
2160 return result;
2161 } 2137 }
2162 2138
2163 2139
2164 /*** 4. SJIS and BIG5 handlers ***/ 2140 /*** 4. SJIS and BIG5 handlers ***/
2165 2141
2233 b1 = temp / BIG5_SAME_ROW + 0xA1; \ 2209 b1 = temp / BIG5_SAME_ROW + 0xA1; \
2234 b2 = temp % BIG5_SAME_ROW; \ 2210 b2 = temp % BIG5_SAME_ROW; \
2235 b2 += b2 < 0x3F ? 0x40 : 0x62; \ 2211 b2 += b2 < 0x3F ? 0x40 : 0x62; \
2236 } while (0) 2212 } while (0)
2237 2213
2238 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
2239 do { \
2240 int c_alt, charset_alt = (charset); \
2241 if (!NILP (translation_table) \
2242 && ((c_alt = translate_char (translation_table, \
2243 -1, (charset), c1, c2)) >= 0)) \
2244 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
2245 if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
2246 DECODE_CHARACTER_ASCII (c1); \
2247 else if (CHARSET_DIMENSION (charset_alt) == 1) \
2248 DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
2249 else \
2250 DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
2251 } while (0)
2252
2253 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
2254 do { \
2255 int c_alt, charset_alt; \
2256 if (!NILP (translation_table) \
2257 && ((c_alt = translate_char (translation_table, -1, \
2258 charset, c1, c2)) \
2259 >= 0)) \
2260 SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
2261 else \
2262 charset_alt = charset; \
2263 if (charset_alt == charset_ascii) \
2264 *dst++ = c1; \
2265 else if (CHARSET_DIMENSION (charset_alt) == 1) \
2266 { \
2267 if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2268 *dst++ = c1; \
2269 else if (sjis_p && charset_alt == charset_latin_jisx0201) \
2270 *dst++ = c1 & 0x7F; \
2271 else \
2272 { \
2273 *dst++ = charset_alt, *dst++ = c1; \
2274 coding->fake_multibyte = 1; \
2275 } \
2276 } \
2277 else \
2278 { \
2279 c1 &= 0x7F, c2 &= 0x7F; \
2280 if (sjis_p && (charset_alt == charset_jisx0208 \
2281 || charset_alt == charset_jisx0208_1978))\
2282 { \
2283 unsigned char s1, s2; \
2284 \
2285 ENCODE_SJIS (c1, c2, s1, s2); \
2286 *dst++ = s1, *dst++ = s2; \
2287 coding->fake_multibyte = 1; \
2288 } \
2289 else if (!sjis_p \
2290 && (charset_alt == charset_big5_1 \
2291 || charset_alt == charset_big5_2)) \
2292 { \
2293 unsigned char b1, b2; \
2294 \
2295 ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \
2296 *dst++ = b1, *dst++ = b2; \
2297 } \
2298 else \
2299 { \
2300 *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
2301 coding->fake_multibyte = 1; \
2302 } \
2303 } \
2304 coding->consumed_char++; \
2305 } while (0)
2306
2307 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2214 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2308 Check if a text is encoded in SJIS. If it is, return 2215 Check if a text is encoded in SJIS. If it is, return
2309 CODING_CATEGORY_MASK_SJIS, else return 0. */ 2216 CODING_CATEGORY_MASK_SJIS, else return 0. */
2310 2217
2311 int 2218 int
2312 detect_coding_sjis (src, src_end) 2219 detect_coding_sjis (src, src_end)
2313 unsigned char *src, *src_end; 2220 unsigned char *src, *src_end;
2314 { 2221 {
2315 unsigned char c; 2222 int c;
2316 2223 /* Dummy for ONE_MORE_BYTE. */
2317 while (src < src_end) 2224 struct coding_system dummy_coding;
2318 { 2225 struct coding_system *coding = &dummy_coding;
2319 c = *src++; 2226
2227 while (1)
2228 {
2229 ONE_MORE_BYTE (c);
2320 if ((c >= 0x80 && c < 0xA0) || c >= 0xE0) 2230 if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2321 { 2231 {
2322 if (src < src_end && *src++ < 0x40) 2232 ONE_MORE_BYTE (c);
2233 if (c < 0x40)
2323 return 0; 2234 return 0;
2324 } 2235 }
2325 } 2236 }
2237 label_end_of_loop:
2326 return CODING_CATEGORY_MASK_SJIS; 2238 return CODING_CATEGORY_MASK_SJIS;
2327 } 2239 }
2328 2240
2329 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2241 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2330 Check if a text is encoded in BIG5. If it is, return 2242 Check if a text is encoded in BIG5. If it is, return
2332 2244
2333 int 2245 int
2334 detect_coding_big5 (src, src_end) 2246 detect_coding_big5 (src, src_end)
2335 unsigned char *src, *src_end; 2247 unsigned char *src, *src_end;
2336 { 2248 {
2337 unsigned char c; 2249 int c;
2338 2250 /* Dummy for ONE_MORE_BYTE. */
2339 while (src < src_end) 2251 struct coding_system dummy_coding;
2340 { 2252 struct coding_system *coding = &dummy_coding;
2341 c = *src++; 2253
2254 while (1)
2255 {
2256 ONE_MORE_BYTE (c);
2342 if (c >= 0xA1) 2257 if (c >= 0xA1)
2343 { 2258 {
2344 if (src >= src_end) 2259 ONE_MORE_BYTE (c);
2345 break;
2346 c = *src++;
2347 if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) 2260 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2348 return 0; 2261 return 0;
2349 } 2262 }
2350 } 2263 }
2264 label_end_of_loop:
2351 return CODING_CATEGORY_MASK_BIG5; 2265 return CODING_CATEGORY_MASK_BIG5;
2352 } 2266 }
2353 2267
2354 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2268 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2355 Check if a text is encoded in UTF-8. If it is, return 2269 Check if a text is encoded in UTF-8. If it is, return
2367 detect_coding_utf_8 (src, src_end) 2281 detect_coding_utf_8 (src, src_end)
2368 unsigned char *src, *src_end; 2282 unsigned char *src, *src_end;
2369 { 2283 {
2370 unsigned char c; 2284 unsigned char c;
2371 int seq_maybe_bytes; 2285 int seq_maybe_bytes;
2372 2286 /* Dummy for ONE_MORE_BYTE. */
2373 while (src < src_end) 2287 struct coding_system dummy_coding;
2374 { 2288 struct coding_system *coding = &dummy_coding;
2375 c = *src++; 2289
2290 while (1)
2291 {
2292 ONE_MORE_BYTE (c);
2376 if (UTF_8_1_OCTET_P (c)) 2293 if (UTF_8_1_OCTET_P (c))
2377 continue; 2294 continue;
2378 else if (UTF_8_2_OCTET_LEADING_P (c)) 2295 else if (UTF_8_2_OCTET_LEADING_P (c))
2379 seq_maybe_bytes = 1; 2296 seq_maybe_bytes = 1;
2380 else if (UTF_8_3_OCTET_LEADING_P (c)) 2297 else if (UTF_8_3_OCTET_LEADING_P (c))
2388 else 2305 else
2389 return 0; 2306 return 0;
2390 2307
2391 do 2308 do
2392 { 2309 {
2393 if (src >= src_end) 2310 ONE_MORE_BYTE (c);
2394 return CODING_CATEGORY_MASK_UTF_8;
2395
2396 c = *src++;
2397 if (!UTF_8_EXTRA_OCTET_P (c)) 2311 if (!UTF_8_EXTRA_OCTET_P (c))
2398 return 0; 2312 return 0;
2399 seq_maybe_bytes--; 2313 seq_maybe_bytes--;
2400 } 2314 }
2401 while (seq_maybe_bytes > 0); 2315 while (seq_maybe_bytes > 0);
2402 } 2316 }
2403 2317
2318 label_end_of_loop:
2404 return CODING_CATEGORY_MASK_UTF_8; 2319 return CODING_CATEGORY_MASK_UTF_8;
2405 } 2320 }
2406 2321
2407 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2322 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2408 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or 2323 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2422 2337
2423 int 2338 int
2424 detect_coding_utf_16 (src, src_end) 2339 detect_coding_utf_16 (src, src_end)
2425 unsigned char *src, *src_end; 2340 unsigned char *src, *src_end;
2426 { 2341 {
2427 if ((src + 1) >= src_end) return 0; 2342 unsigned char c1, c2;
2428 2343 /* Dummy for TWO_MORE_BYTES. */
2429 if ((src[0] == 0xFF) && (src[1] == 0xFE)) 2344 struct coding_system dummy_coding;
2345 struct coding_system *coding = &dummy_coding;
2346
2347 TWO_MORE_BYTES (c1, c2);
2348
2349 if ((c1 == 0xFF) && (c2 == 0xFE))
2430 return CODING_CATEGORY_MASK_UTF_16_LE; 2350 return CODING_CATEGORY_MASK_UTF_16_LE;
2431 else if ((src[0] == 0xFE) && (src[1] == 0xFF)) 2351 else if ((c1 == 0xFE) && (c2 == 0xFF))
2432 return CODING_CATEGORY_MASK_UTF_16_BE; 2352 return CODING_CATEGORY_MASK_UTF_16_BE;
2433 2353
2354 label_end_of_loop:
2434 return 0; 2355 return 0;
2435 } 2356 }
2436 2357
2437 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". 2358 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2438 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ 2359 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
2439 2360
2440 int 2361 static void
2441 decode_coding_sjis_big5 (coding, source, destination, 2362 decode_coding_sjis_big5 (coding, source, destination,
2442 src_bytes, dst_bytes, sjis_p) 2363 src_bytes, dst_bytes, sjis_p)
2443 struct coding_system *coding; 2364 struct coding_system *coding;
2444 unsigned char *source, *destination; 2365 unsigned char *source, *destination;
2445 int src_bytes, dst_bytes; 2366 int src_bytes, dst_bytes;
2447 { 2368 {
2448 unsigned char *src = source; 2369 unsigned char *src = source;
2449 unsigned char *src_end = source + src_bytes; 2370 unsigned char *src_end = source + src_bytes;
2450 unsigned char *dst = destination; 2371 unsigned char *dst = destination;
2451 unsigned char *dst_end = destination + dst_bytes; 2372 unsigned char *dst_end = destination + dst_bytes;
2452 /* Since the maximum bytes produced by each loop is 4, we subtract 3 2373 /* SRC_BASE remembers the start position in source in each loop.
2453 from DST_END to assure overflow checking is necessary only at the 2374 The loop will be exited when there's not enough source code
2454 head of loop. */ 2375 (within macro ONE_MORE_BYTE), or when there's not enough
2455 unsigned char *adjusted_dst_end = dst_end - 3; 2376 destination area to produce a character (within macro
2456 Lisp_Object translation_table 2377 EMIT_CHAR). */
2457 = coding->translation_table_for_decode; 2378 unsigned char *src_base;
2458 int result = CODING_FINISH_NORMAL; 2379 Lisp_Object translation_table;
2459 2380
2460 if (!NILP (Venable_character_translation) && NILP (translation_table)) 2381 if (NILP (Venable_character_translation))
2461 translation_table = Vstandard_translation_table_for_decode; 2382 translation_table = Qnil;
2383 else
2384 {
2385 translation_table = coding->translation_table_for_decode;
2386 if (NILP (translation_table))
2387 translation_table = Vstandard_translation_table_for_decode;
2388 }
2462 2389
2463 coding->produced_char = 0; 2390 coding->produced_char = 0;
2464 coding->fake_multibyte = 0; 2391 while (1)
2465 while (src < src_end && (dst_bytes 2392 {
2466 ? (dst < adjusted_dst_end) 2393 int c, charset, c1, c2;
2467 : (dst < src - 3))) 2394
2468 { 2395 src_base = src;
2469 /* SRC_BASE remembers the start position in source in each loop. 2396 ONE_MORE_BYTE (c1);
2470 The loop will be exited when there's not enough source text 2397
2471 to analyze two-byte character (within macro ONE_MORE_BYTE). 2398 if (c1 < 0x80)
2472 In that case, SRC is reset to SRC_BASE before exiting. */ 2399 {
2473 unsigned char *src_base = src; 2400 charset = CHARSET_ASCII;
2474 unsigned char c1 = *src++, c2, c3, c4; 2401 if (c1 < 0x20)
2475
2476 if (c1 < 0x20)
2477 {
2478 if (c1 == '\r')
2479 { 2402 {
2480 if (coding->eol_type == CODING_EOL_CRLF) 2403 if (c1 == '\r')
2481 { 2404 {
2482 ONE_MORE_BYTE (c2); 2405 if (coding->eol_type == CODING_EOL_CRLF)
2483 if (c2 == '\n')
2484 *dst++ = c2;
2485 else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2486 { 2406 {
2487 result = CODING_FINISH_INCONSISTENT_EOL; 2407 ONE_MORE_BYTE (c2);
2488 goto label_end_of_loop_2; 2408 if (c2 == '\n')
2409 c1 = c2;
2410 else if (coding->mode
2411 & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2412 {
2413 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2414 goto label_end_of_loop;
2415 }
2416 else
2417 /* To process C2 again, SRC is subtracted by 1. */
2418 src--;
2489 } 2419 }
2490 else 2420 else if (coding->eol_type == CODING_EOL_CR)
2491 /* To process C2 again, SRC is subtracted by 1. */ 2421 c1 = '\n';
2492 *dst++ = c1, src--;
2493 } 2422 }
2494 else if (coding->eol_type == CODING_EOL_CR) 2423 else if (c1 == '\n'
2495 *dst++ = '\n'; 2424 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2496 else 2425 && (coding->eol_type == CODING_EOL_CR
2497 *dst++ = c1; 2426 || coding->eol_type == CODING_EOL_CRLF))
2427 {
2428 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2429 goto label_end_of_loop;
2430 }
2498 } 2431 }
2499 else if (c1 == '\n' 2432 }
2500 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) 2433 else
2501 && (coding->eol_type == CODING_EOL_CR
2502 || coding->eol_type == CODING_EOL_CRLF))
2503 {
2504 result = CODING_FINISH_INCONSISTENT_EOL;
2505 goto label_end_of_loop_2;
2506 }
2507 else
2508 *dst++ = c1;
2509 coding->produced_char++;
2510 }
2511 else if (c1 < 0x80)
2512 { 2434 {
2513 c2 = 0; /* avoid warning */
2514 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2515 }
2516 else
2517 {
2518 if (sjis_p) 2435 if (sjis_p)
2519 { 2436 {
2520 if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0)) 2437 if (c1 >= 0xF0)
2438 goto label_invalid_code;
2439 if (c1 < 0xA0 || c1 >= 0xE0)
2521 { 2440 {
2522 /* SJIS -> JISX0208 */ 2441 /* SJIS -> JISX0208 */
2523 ONE_MORE_BYTE (c2); 2442 ONE_MORE_BYTE (c2);
2524 if (c2 >= 0x40 && c2 != 0x7F && c2 <= 0xFC) 2443 if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2525 { 2444 goto label_invalid_code;
2526 DECODE_SJIS (c1, c2, c3, c4); 2445 DECODE_SJIS (c1, c2, c1, c2);
2527 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); 2446 charset = charset_jisx0208;
2528 }
2529 else
2530 goto label_invalid_code_2;
2531 }
2532 else if (c1 < 0xE0)
2533 /* SJIS -> JISX0201-Kana */
2534 {
2535 c2 = 0; /* avoid warning */
2536 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2537 /* dummy */ c2);
2538 } 2447 }
2539 else 2448 else
2540 goto label_invalid_code_1; 2449 /* SJIS -> JISX0201-Kana */
2450 charset = charset_katakana_jisx0201;
2541 } 2451 }
2542 else 2452 else
2543 { 2453 {
2544 /* BIG5 -> Big5 */ 2454 /* BIG5 -> Big5 */
2545 if (c1 >= 0xA1 && c1 <= 0xFE) 2455 if (c1 < 0xA1 || c1 > 0xFE)
2546 { 2456 goto label_invalid_code;
2547 ONE_MORE_BYTE (c2); 2457 ONE_MORE_BYTE (c2);
2548 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) 2458 if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
2549 { 2459 goto label_invalid_code;
2550 int charset; 2460 DECODE_BIG5 (c1, c2, charset, c1, c2);
2551
2552 DECODE_BIG5 (c1, c2, charset, c3, c4);
2553 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2554 }
2555 else
2556 goto label_invalid_code_2;
2557 }
2558 else
2559 goto label_invalid_code_1;
2560 } 2461 }
2561 } 2462 }
2463
2464 c = DECODE_ISO_CHARACTER (charset, c1, c2);
2465 EMIT_CHAR (c);
2562 continue; 2466 continue;
2563 2467
2564 label_invalid_code_1: 2468 label_invalid_code:
2565 *dst++ = c1; 2469 coding->errors++;
2566 coding->produced_char++;
2567 coding->fake_multibyte = 1;
2568 continue;
2569
2570 label_invalid_code_2:
2571 *dst++ = c1; *dst++= c2;
2572 coding->produced_char += 2;
2573 coding->fake_multibyte = 1;
2574 continue;
2575
2576 label_end_of_loop:
2577 result = CODING_FINISH_INSUFFICIENT_SRC;
2578 label_end_of_loop_2:
2579 src = src_base; 2470 src = src_base;
2580 break; 2471 c = *src++;
2581 } 2472 EMIT_CHAR (c);
2582 2473 }
2583 if (src < src_end) 2474
2584 { 2475 label_end_of_loop:
2585 if (result == CODING_FINISH_NORMAL) 2476 coding->consumed = coding->consumed_char = src_base - source;
2586 result = CODING_FINISH_INSUFFICIENT_DST;
2587 else if (result != CODING_FINISH_INCONSISTENT_EOL
2588 && coding->mode & CODING_MODE_LAST_BLOCK)
2589 {
2590 src_bytes = src_end - src;
2591 if (dst_bytes && (dst_end - dst < src_bytes))
2592 src_bytes = dst_end - dst;
2593 bcopy (dst, src, src_bytes);
2594 src += src_bytes;
2595 dst += src_bytes;
2596 coding->fake_multibyte = 1;
2597 }
2598 }
2599
2600 coding->consumed = coding->consumed_char = src - source;
2601 coding->produced = dst - destination; 2477 coding->produced = dst - destination;
2602 return result; 2478 return;
2603 } 2479 }
2604 2480
2605 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". 2481 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2606 This function can encode `charset_ascii', `charset_katakana_jisx0201', 2482 This function can encode charsets `ascii', `katakana-jisx0201',
2607 `charset_jisx0208', `charset_big5_1', and `charset_big5-2'. We are 2483 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
2608 sure that all these charsets are registered as official charset 2484 are sure that all these charsets are registered as official charset
2609 (i.e. do not have extended leading-codes). Characters of other 2485 (i.e. do not have extended leading-codes). Characters of other
2610 charsets are produced without any encoding. If SJIS_P is 1, encode 2486 charsets are produced without any encoding. If SJIS_P is 1, encode
2611 SJIS text, else encode BIG5 text. */ 2487 SJIS text, else encode BIG5 text. */
2612 2488
2613 int 2489 static void
2614 encode_coding_sjis_big5 (coding, source, destination, 2490 encode_coding_sjis_big5 (coding, source, destination,
2615 src_bytes, dst_bytes, sjis_p) 2491 src_bytes, dst_bytes, sjis_p)
2616 struct coding_system *coding; 2492 struct coding_system *coding;
2617 unsigned char *source, *destination; 2493 unsigned char *source, *destination;
2618 int src_bytes, dst_bytes; 2494 int src_bytes, dst_bytes;
2620 { 2496 {
2621 unsigned char *src = source; 2497 unsigned char *src = source;
2622 unsigned char *src_end = source + src_bytes; 2498 unsigned char *src_end = source + src_bytes;
2623 unsigned char *dst = destination; 2499 unsigned char *dst = destination;
2624 unsigned char *dst_end = destination + dst_bytes; 2500 unsigned char *dst_end = destination + dst_bytes;
2625 /* Since the maximum bytes produced by each loop is 2, we subtract 1 2501 /* SRC_BASE remembers the start position in source in each loop.
2626 from DST_END to assure overflow checking is necessary only at the 2502 The loop will be exited when there's not enough source text to
2627 head of loop. */ 2503 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2628 unsigned char *adjusted_dst_end = dst_end - 1; 2504 there's not enough destination area to produce encoded codes
2629 Lisp_Object translation_table 2505 (within macro EMIT_BYTES). */
2630 = coding->translation_table_for_encode; 2506 unsigned char *src_base;
2631 int result = CODING_FINISH_NORMAL; 2507 Lisp_Object translation_table;
2632 2508
2633 if (!NILP (Venable_character_translation) && NILP (translation_table)) 2509 if (NILP (Venable_character_translation))
2634 translation_table = Vstandard_translation_table_for_encode; 2510 translation_table = Qnil;
2635 2511 else
2636 coding->consumed_char = 0; 2512 {
2637 coding->fake_multibyte = 0; 2513 translation_table = coding->translation_table_for_decode;
2638 while (src < src_end && (dst_bytes 2514 if (NILP (translation_table))
2639 ? (dst < adjusted_dst_end) 2515 translation_table = Vstandard_translation_table_for_decode;
2640 : (dst < src - 1))) 2516 }
2641 { 2517
2642 /* SRC_BASE remembers the start position in source in each loop. 2518 while (1)
2643 The loop will be exited when there's not enough source text 2519 {
2644 to analyze multi-byte codes (within macros ONE_MORE_BYTE and 2520 int c, charset, c1, c2;
2645 TWO_MORE_BYTES). In that case, SRC is reset to SRC_BASE 2521
2646 before exiting. */ 2522 src_base = src;
2647 unsigned char *src_base = src; 2523 ONE_MORE_CHAR (c);
2648 unsigned char c1 = *src++, c2, c3, c4; 2524
2649 2525 /* Now encode the character C. */
2650 switch (emacs_code_class[c1]) 2526 if (SINGLE_BYTE_CHAR_P (c))
2651 { 2527 {
2652 case EMACS_ascii_code: 2528 switch (c)
2653 ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2654 break;
2655
2656 case EMACS_control_code:
2657 *dst++ = c1;
2658 coding->consumed_char++;
2659 break;
2660
2661 case EMACS_carriage_return_code:
2662 if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2663 { 2529 {
2664 *dst++ = c1; 2530 case '\r':
2665 coding->consumed_char++; 2531 if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2666 break; 2532 {
2533 EMIT_ONE_BYTE (c);
2534 break;
2535 }
2536 c = '\n';
2537 case '\n':
2538 if (coding->eol_type == CODING_EOL_CRLF)
2539 {
2540 EMIT_TWO_BYTES ('\r', c);
2541 break;
2542 }
2543 else if (coding->eol_type == CODING_EOL_CR)
2544 c = '\r';
2545 default:
2546 EMIT_ONE_BYTE (c);
2667 } 2547 }
2668 /* fall down to treat '\r' as '\n' ... */ 2548 }
2669 2549 else
2670 case EMACS_linefeed_code: 2550 {
2671 if (coding->eol_type == CODING_EOL_LF 2551 SPLIT_CHAR (c, charset, c1, c2);
2672 || coding->eol_type == CODING_EOL_UNDECIDED) 2552 if (sjis_p)
2673 *dst++ = '\n'; 2553 {
2674 else if (coding->eol_type == CODING_EOL_CRLF) 2554 if (charset == charset_jisx0208
2675 *dst++ = '\r', *dst++ = '\n'; 2555 || charset == charset_jisx0208_1978)
2556 {
2557 ENCODE_SJIS (c1, c2, c1, c2);
2558 EMIT_TWO_BYTES (c1, c2);
2559 }
2560 else if (charset == charset_latin_jisx0201)
2561 EMIT_ONE_BYTE (c1);
2562 else
2563 /* There's no way other than producing the internal
2564 codes as is. */
2565 EMIT_BYTES (src_base, src);
2566 }
2676 else 2567 else
2677 *dst++ = '\r'; 2568 {
2678 coding->consumed_char++; 2569 if (charset == charset_big5_1 || charset == charset_big5_2)
2679 break; 2570 {
2680 2571 ENCODE_BIG5 (charset, c1, c2, c1, c2);
2681 case EMACS_leading_code_2: 2572 EMIT_TWO_BYTES (c1, c2);
2682 ONE_MORE_BYTE (c2); 2573 }
2683 ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3); 2574 else
2684 break; 2575 /* There's no way other than producing the internal
2685 2576 codes as is. */
2686 case EMACS_leading_code_3: 2577 EMIT_BYTES (src_base, src);
2687 TWO_MORE_BYTES (c2, c3); 2578 }
2688 ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3); 2579 }
2689 break; 2580 coding->consumed_char++;
2690 2581 }
2691 case EMACS_leading_code_4: 2582
2692 THREE_MORE_BYTES (c2, c3, c4); 2583 label_end_of_loop:
2693 ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4); 2584 coding->consumed = src_base - source;
2694 break;
2695
2696 default: /* i.e. case EMACS_invalid_code: */
2697 *dst++ = c1;
2698 coding->consumed_char++;
2699 }
2700 continue;
2701
2702 label_end_of_loop:
2703 result = CODING_FINISH_INSUFFICIENT_SRC;
2704 src = src_base;
2705 break;
2706 }
2707
2708 if (result == CODING_FINISH_NORMAL
2709 && src < src_end)
2710 result = CODING_FINISH_INSUFFICIENT_DST;
2711 coding->consumed = src - source;
2712 coding->produced = coding->produced_char = dst - destination; 2585 coding->produced = coding->produced_char = dst - destination;
2713 return result;
2714 } 2586 }
2715 2587
2716 2588
2717 /*** 5. CCL handlers ***/ 2589 /*** 5. CCL handlers ***/
2718 2590
2724 int 2596 int
2725 detect_coding_ccl (src, src_end) 2597 detect_coding_ccl (src, src_end)
2726 unsigned char *src, *src_end; 2598 unsigned char *src, *src_end;
2727 { 2599 {
2728 unsigned char *valid; 2600 unsigned char *valid;
2601 int c;
2602 /* Dummy for ONE_MORE_BYTE. */
2603 struct coding_system dummy_coding;
2604 struct coding_system *coding = &dummy_coding;
2729 2605
2730 /* No coding system is assigned to coding-category-ccl. */ 2606 /* No coding system is assigned to coding-category-ccl. */
2731 if (!coding_system_table[CODING_CATEGORY_IDX_CCL]) 2607 if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
2732 return 0; 2608 return 0;
2733 2609
2734 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; 2610 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2735 while (src < src_end) 2611 while (1)
2736 { 2612 {
2737 if (! valid[*src]) return 0; 2613 ONE_MORE_BYTE (c);
2738 src++; 2614 if (! valid[c])
2739 } 2615 return 0;
2616 }
2617 label_end_of_loop:
2740 return CODING_CATEGORY_MASK_CCL; 2618 return CODING_CATEGORY_MASK_CCL;
2741 } 2619 }
2742 2620
2743 2621
2744 /*** 6. End-of-line handlers ***/ 2622 /*** 6. End-of-line handlers ***/
2745 2623
2746 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". 2624 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2747 This function is called only when `coding->eol_type' is 2625
2748 CODING_EOL_CRLF or CODING_EOL_CR. */ 2626 static void
2749
2750 int
2751 decode_eol (coding, source, destination, src_bytes, dst_bytes) 2627 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2752 struct coding_system *coding; 2628 struct coding_system *coding;
2753 unsigned char *source, *destination; 2629 unsigned char *source, *destination;
2754 int src_bytes, dst_bytes; 2630 int src_bytes, dst_bytes;
2755 { 2631 {
2756 unsigned char *src = source; 2632 unsigned char *src = source;
2757 unsigned char *src_end = source + src_bytes;
2758 unsigned char *dst = destination; 2633 unsigned char *dst = destination;
2759 unsigned char *dst_end = destination + dst_bytes; 2634 unsigned char *src_end = src + src_bytes;
2760 unsigned char c; 2635 unsigned char *dst_end = dst + dst_bytes;
2761 int result = CODING_FINISH_NORMAL; 2636 Lisp_Object translation_table;
2762 2637 /* SRC_BASE remembers the start position in source in each loop.
2763 coding->fake_multibyte = 0; 2638 The loop will be exited when there's not enough source code
2764 2639 (within macro ONE_MORE_BYTE), or when there's not enough
2765 if (src_bytes <= 0) 2640 destination area to produce a character (within macro
2766 { 2641 EMIT_CHAR). */
2767 coding->produced = coding->produced_char = 0; 2642 unsigned char *src_base;
2768 coding->consumed = coding->consumed_char = 0; 2643 int c;
2769 return result; 2644
2770 } 2645 translation_table = Qnil;
2771
2772 switch (coding->eol_type) 2646 switch (coding->eol_type)
2773 { 2647 {
2774 case CODING_EOL_CRLF: 2648 case CODING_EOL_CRLF:
2775 { 2649 while (1)
2776 /* Since the maximum bytes produced by each loop is 2, we 2650 {
2777 subtract 1 from DST_END to assure overflow checking is 2651 src_base = src;
2778 necessary only at the head of loop. */ 2652 ONE_MORE_BYTE (c);
2779 unsigned char *adjusted_dst_end = dst_end - 1; 2653 if (c == '\r')
2780 2654 {
2781 while (src < src_end && (dst_bytes 2655 ONE_MORE_BYTE (c);
2782 ? (dst < adjusted_dst_end) 2656 if (c != '\n')
2783 : (dst < src - 1))) 2657 {
2784 { 2658 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2785 unsigned char *src_base = src; 2659 {
2786 2660 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2787 c = *src++; 2661 goto label_end_of_loop;
2788 if (c == '\r') 2662 }
2789 { 2663 src--;
2790 ONE_MORE_BYTE (c); 2664 c = '\r';
2791 if (c == '\n') 2665 }
2792 *dst++ = c; 2666 }
2793 else 2667 else if (c == '\n'
2794 { 2668 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2795 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) 2669 {
2796 { 2670 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2797 result = CODING_FINISH_INCONSISTENT_EOL; 2671 goto label_end_of_loop;
2798 goto label_end_of_loop_2; 2672 }
2799 } 2673 EMIT_CHAR (c);
2800 src--; 2674 }
2801 *dst++ = '\r';
2802 if (BASE_LEADING_CODE_P (c))
2803 coding->fake_multibyte = 1;
2804 }
2805 }
2806 else if (c == '\n'
2807 && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2808 {
2809 result = CODING_FINISH_INCONSISTENT_EOL;
2810 goto label_end_of_loop_2;
2811 }
2812 else
2813 {
2814 *dst++ = c;
2815 if (BASE_LEADING_CODE_P (c))
2816 coding->fake_multibyte = 1;
2817 }
2818 continue;
2819
2820 label_end_of_loop:
2821 result = CODING_FINISH_INSUFFICIENT_SRC;
2822 label_end_of_loop_2:
2823 src = src_base;
2824 break;
2825 }
2826 if (src < src_end)
2827 {
2828 if (result == CODING_FINISH_NORMAL)
2829 result = CODING_FINISH_INSUFFICIENT_DST;
2830 else if (result != CODING_FINISH_INCONSISTENT_EOL
2831 && coding->mode & CODING_MODE_LAST_BLOCK)
2832 {
2833 /* This is the last block of the text to be decoded.
2834 We flush out all remaining codes. */
2835 src_bytes = src_end - src;
2836 if (dst_bytes && (dst_end - dst < src_bytes))
2837 src_bytes = dst_end - dst;
2838 bcopy (src, dst, src_bytes);
2839 dst += src_bytes;
2840 src += src_bytes;
2841 }
2842 }
2843 }
2844 break; 2675 break;
2845 2676
2846 case CODING_EOL_CR: 2677 case CODING_EOL_CR:
2847 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) 2678 while (1)
2848 { 2679 {
2849 while (src < src_end) 2680 src_base = src;
2681 ONE_MORE_BYTE (c);
2682 if (c == '\n')
2850 { 2683 {
2851 if ((c = *src++) == '\n') 2684 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2852 break; 2685 {
2853 if (BASE_LEADING_CODE_P (c)) 2686 coding->result = CODING_FINISH_INCONSISTENT_EOL;
2854 coding->fake_multibyte = 1; 2687 goto label_end_of_loop;
2688 }
2855 } 2689 }
2856 if (*--src == '\n') 2690 else if (c == '\r')
2857 { 2691 c = '\n';
2858 src_bytes = src - source; 2692 EMIT_CHAR (c);
2859 result = CODING_FINISH_INCONSISTENT_EOL; 2693 }
2860 }
2861 }
2862 if (dst_bytes && src_bytes > dst_bytes)
2863 {
2864 result = CODING_FINISH_INSUFFICIENT_DST;
2865 src_bytes = dst_bytes;
2866 }
2867 if (dst_bytes)
2868 bcopy (source, destination, src_bytes);
2869 else
2870 safe_bcopy (source, destination, src_bytes);
2871 src = source + src_bytes;
2872 while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2873 break; 2694 break;
2874 2695
2875 default: /* i.e. case: CODING_EOL_LF */ 2696 default: /* no need for EOL handling */
2876 if (dst_bytes && src_bytes > dst_bytes) 2697 while (1)
2877 { 2698 {
2878 result = CODING_FINISH_INSUFFICIENT_DST; 2699 src_base = src;
2879 src_bytes = dst_bytes; 2700 ONE_MORE_BYTE (c);
2880 } 2701 EMIT_CHAR (c);
2881 if (dst_bytes) 2702 }
2882 bcopy (source, destination, src_bytes); 2703 }
2883 else 2704
2884 safe_bcopy (source, destination, src_bytes); 2705 label_end_of_loop:
2885 src += src_bytes; 2706 coding->consumed = coding->consumed_char = src_base - source;
2886 dst += src_bytes; 2707 coding->produced = dst - destination;
2887 coding->fake_multibyte = 1; 2708 return;
2888 break;
2889 }
2890
2891 coding->consumed = coding->consumed_char = src - source;
2892 coding->produced = coding->produced_char = dst - destination;
2893 return result;
2894 } 2709 }
2895 2710
2896 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode 2711 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
2897 format of end-of-line according to `coding->eol_type'. If 2712 format of end-of-line according to `coding->eol_type'. It also
2898 `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code 2713 convert multibyte form 8-bit characers to unibyte if
2899 '\r' in source text also means end-of-line. */ 2714 CODING->src_multibyte is nonzero. If `coding->mode &
2900 2715 CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
2901 int 2716 also means end-of-line. */
2717
2718 static void
2902 encode_eol (coding, source, destination, src_bytes, dst_bytes) 2719 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2903 struct coding_system *coding; 2720 struct coding_system *coding;
2904 unsigned char *source, *destination; 2721 unsigned char *source, *destination;
2905 int src_bytes, dst_bytes; 2722 int src_bytes, dst_bytes;
2906 { 2723 {
2907 unsigned char *src = source; 2724 unsigned char *src = source;
2908 unsigned char *dst = destination; 2725 unsigned char *dst = destination;
2909 int result = CODING_FINISH_NORMAL; 2726 unsigned char *src_end = src + src_bytes;
2910 2727 unsigned char *dst_end = dst + dst_bytes;
2911 coding->fake_multibyte = 0; 2728 Lisp_Object translation_table;
2729 /* SRC_BASE remembers the start position in source in each loop.
2730 The loop will be exited when there's not enough source text to
2731 analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2732 there's not enough destination area to produce encoded codes
2733 (within macro EMIT_BYTES). */
2734 unsigned char *src_base;
2735 int c;
2736 int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
2737
2738 translation_table = Qnil;
2739 if (coding->src_multibyte
2740 && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
2741 {
2742 src_end--;
2743 src_bytes--;
2744 coding->result = CODING_FINISH_INSUFFICIENT_SRC;
2745 }
2912 2746
2913 if (coding->eol_type == CODING_EOL_CRLF) 2747 if (coding->eol_type == CODING_EOL_CRLF)
2914 { 2748 {
2915 unsigned char c; 2749 while (src < src_end)
2916 unsigned char *src_end = source + src_bytes; 2750 {
2917 unsigned char *dst_end = destination + dst_bytes; 2751 src_base = src;
2918 /* Since the maximum bytes produced by each loop is 2, we
2919 subtract 1 from DST_END to assure overflow checking is
2920 necessary only at the head of loop. */
2921 unsigned char *adjusted_dst_end = dst_end - 1;
2922
2923 while (src < src_end && (dst_bytes
2924 ? (dst < adjusted_dst_end)
2925 : (dst < src - 1)))
2926 {
2927 c = *src++; 2752 c = *src++;
2928 if (c == '\n' 2753 if (c >= 0x20)
2929 || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) 2754 EMIT_ONE_BYTE (c);
2930 *dst++ = '\r', *dst++ = '\n'; 2755 else if (c == '\n' || (c == '\r' && selective_display))
2756 EMIT_TWO_BYTES ('\r', '\n');
2931 else 2757 else
2932 { 2758 EMIT_ONE_BYTE (c);
2933 *dst++ = c; 2759 }
2934 if (BASE_LEADING_CODE_P (c)) 2760 label_end_of_loop:
2935 coding->fake_multibyte = 1;
2936 }
2937 }
2938 if (src < src_end)
2939 result = CODING_FINISH_INSUFFICIENT_DST;
2940 } 2761 }
2941 else 2762 else
2942 { 2763 {
2943 unsigned char c; 2764 if (src_bytes <= dst_bytes)
2944 2765 {
2945 if (dst_bytes && src_bytes > dst_bytes) 2766 safe_bcopy (src, dst, src_bytes);
2946 { 2767 src_base = src_end;
2947 src_bytes = dst_bytes; 2768 dst += src_bytes;
2948 result = CODING_FINISH_INSUFFICIENT_DST; 2769 }
2949 }
2950 if (dst_bytes)
2951 bcopy (source, destination, src_bytes);
2952 else 2770 else
2953 safe_bcopy (source, destination, src_bytes); 2771 {
2954 dst_bytes = src_bytes; 2772 if (coding->src_multibyte
2773 && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
2774 dst_bytes--;
2775 safe_bcopy (src, dst, dst_bytes);
2776 src_base = src + dst_bytes;
2777 dst = destination + dst_bytes;
2778 coding->result = CODING_FINISH_INSUFFICIENT_DST;
2779 }
2955 if (coding->eol_type == CODING_EOL_CR) 2780 if (coding->eol_type == CODING_EOL_CR)
2956 { 2781 {
2957 while (src_bytes--) 2782 for (src = destination; src < dst; src++)
2958 { 2783 if (*src == '\n') *src = '\r';
2959 if ((c = *dst++) == '\n') 2784 }
2960 dst[-1] = '\r'; 2785 else if (selective_display)
2961 else if (BASE_LEADING_CODE_P (c)) 2786 {
2962 coding->fake_multibyte = 1; 2787 for (src = destination; src < dst; src++)
2963 } 2788 if (*src == '\r') *src = '\n';
2964 } 2789 }
2965 else 2790 }
2966 { 2791 if (coding->src_multibyte)
2967 if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) 2792 dst = destination + str_as_unibyte (destination, dst - destination);
2968 { 2793
2969 while (src_bytes--) 2794 coding->consumed = src_base - source;
2970 if (*dst++ == '\r') dst[-1] = '\n'; 2795 coding->produced = dst - destination;
2971 }
2972 coding->fake_multibyte = 1;
2973 }
2974 src = source + dst_bytes;
2975 dst = destination + dst_bytes;
2976 }
2977
2978 coding->consumed = coding->consumed_char = src - source;
2979 coding->produced = coding->produced_char = dst - destination;
2980 return result;
2981 } 2796 }
2982 2797
2983 2798
2984 /*** 7. C library functions ***/ 2799 /*** 7. C library functions ***/
2985 2800
3784 3599
3785 tmp = Fget (val, Qeol_type); 3600 tmp = Fget (val, Qeol_type);
3786 if (VECTORP (tmp)) 3601 if (VECTORP (tmp))
3787 val = XVECTOR (tmp)->contents[coding->eol_type]; 3602 val = XVECTOR (tmp)->contents[coding->eol_type];
3788 } 3603 }
3789 setup_coding_system (val, coding); 3604
3790 /* Set this again because setup_coding_system reset this member. */ 3605 /* Setup this new coding system while preserving some slots. */
3791 coding->heading_ascii = skip; 3606 {
3607 int src_multibyte = coding->src_multibyte;
3608 int dst_multibyte = coding->dst_multibyte;
3609
3610 setup_coding_system (val, coding);
3611 coding->src_multibyte = src_multibyte;
3612 coding->dst_multibyte = dst_multibyte;
3613 coding->heading_ascii = skip;
3614 }
3792 } 3615 }
3793 3616
3794 /* Detect how end-of-line of a text of length SRC_BYTES pointed by 3617 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3795 SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF, 3618 SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3796 CODING_EOL_CR, and CODING_EOL_UNDECIDED. 3619 CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3967 } 3790 }
3968 3791
3969 val = Fget (coding->symbol, Qeol_type); 3792 val = Fget (coding->symbol, Qeol_type);
3970 if (VECTORP (val) && XVECTOR (val)->size == 3) 3793 if (VECTORP (val) && XVECTOR (val)->size == 3)
3971 { 3794 {
3795 int src_multibyte = coding->src_multibyte;
3796 int dst_multibyte = coding->dst_multibyte;
3797
3972 setup_coding_system (XVECTOR (val)->contents[eol_type], coding); 3798 setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3799 coding->src_multibyte = src_multibyte;
3800 coding->dst_multibyte = dst_multibyte;
3973 coding->heading_ascii = skip; 3801 coding->heading_ascii = skip;
3974 } 3802 }
3975 } 3803 }
3976 3804
3977 #define CONVERSION_BUFFER_EXTRA_ROOM 256 3805 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3978 3806
3979 #define DECODING_BUFFER_MAG(coding) \ 3807 #define DECODING_BUFFER_MAG(coding) \
3980 (coding->type == coding_type_iso2022 \ 3808 (coding->type == coding_type_iso2022 \
3981 ? 3 \ 3809 ? 3 \
3982 : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \ 3810 : (coding->type == coding_type_ccl \
3983 ? 2 \ 3811 ? coding->spec.ccl.decoder.buf_magnification \
3984 : (coding->type == coding_type_raw_text \ 3812 : 2))
3985 ? 1 \
3986 : (coding->type == coding_type_ccl \
3987 ? coding->spec.ccl.decoder.buf_magnification \
3988 : 2))))
3989 3813
3990 /* Return maximum size (bytes) of a buffer enough for decoding 3814 /* Return maximum size (bytes) of a buffer enough for decoding
3991 SRC_BYTES of text encoded in CODING. */ 3815 SRC_BYTES of text encoded in CODING. */
3992 3816
3993 int 3817 int
4009 { 3833 {
4010 int magnification; 3834 int magnification;
4011 3835
4012 if (coding->type == coding_type_ccl) 3836 if (coding->type == coding_type_ccl)
4013 magnification = coding->spec.ccl.encoder.buf_magnification; 3837 magnification = coding->spec.ccl.encoder.buf_magnification;
3838 else if (CODING_REQUIRE_ENCODING (coding))
3839 magnification = 3;
4014 else 3840 else
4015 magnification = 3; 3841 magnification = 1;
4016 3842
4017 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM); 3843 return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4018 } 3844 }
4019 3845
4020 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE 3846 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
4058 3884
4059 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK; 3885 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4060 3886
4061 coding->produced = ccl_driver (ccl, source, destination, 3887 coding->produced = ccl_driver (ccl, source, destination,
4062 src_bytes, dst_bytes, &(coding->consumed)); 3888 src_bytes, dst_bytes, &(coding->consumed));
4063 coding->produced_char 3889 if (encodep)
4064 = (encodep 3890 coding->produced_char = coding->produced;
4065 ? coding->produced 3891 else
4066 : multibyte_chars_in_text (destination, coding->produced)); 3892 {
4067 coding->consumed_char 3893 int bytes
4068 = multibyte_chars_in_text (source, coding->consumed); 3894 = dst_bytes ? dst_bytes : source + coding->consumed - destination;
3895 coding->produced = str_as_multibyte (destination, bytes,
3896 coding->produced,
3897 &(coding->produced_char));
3898 }
4069 3899
4070 switch (ccl->status) 3900 switch (ccl->status)
4071 { 3901 {
4072 case CCL_STAT_SUSPEND_BY_SRC: 3902 case CCL_STAT_SUSPEND_BY_SRC:
4073 result = CODING_FINISH_INSUFFICIENT_SRC; 3903 result = CODING_FINISH_INSUFFICIENT_SRC;
4086 return result; 3916 return result;
4087 } 3917 }
4088 3918
4089 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before 3919 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
4090 decoding, it may detect coding system and format of end-of-line if 3920 decoding, it may detect coding system and format of end-of-line if
4091 those are not yet decided. 3921 those are not yet decided. The source should be unibyte, the
4092 3922 result is multibyte if CODING->dst_multibyte is nonzero, else
4093 This function does not make full use of DESTINATION buffer. For 3923 unibyte. */
4094 instance, if coding->type is coding_type_iso2022, it uses only
4095 (DST_BYTES - 7) bytes of DESTINATION buffer. In the case that
4096 DST_BYTES is decided by the function decoding_buffer_size, it
4097 contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
4098 So, this function can decode the full SOURCE. But, in the other
4099 case, if you want to avoid carry over, you must supply at least 7
4100 bytes more area in DESTINATION buffer than expected maximum bytes
4101 that will be produced by this function. */
4102 3924
4103 int 3925 int
4104 decode_coding (coding, source, destination, src_bytes, dst_bytes) 3926 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4105 struct coding_system *coding; 3927 struct coding_system *coding;
4106 unsigned char *source, *destination; 3928 unsigned char *source, *destination;
4107 int src_bytes, dst_bytes; 3929 int src_bytes, dst_bytes;
4108 { 3930 {
4109 int result;
4110
4111 if (src_bytes <= 0
4112 && coding->type != coding_type_ccl
4113 && ! (coding->mode & CODING_MODE_LAST_BLOCK
4114 && CODING_REQUIRE_FLUSHING (coding)))
4115 {
4116 coding->produced = coding->produced_char = 0;
4117 coding->consumed = coding->consumed_char = 0;
4118 coding->fake_multibyte = 0;
4119 return CODING_FINISH_NORMAL;
4120 }
4121
4122 if (coding->type == coding_type_undecided) 3931 if (coding->type == coding_type_undecided)
4123 detect_coding (coding, source, src_bytes); 3932 detect_coding (coding, source, src_bytes);
4124 3933
4125 if (coding->eol_type == CODING_EOL_UNDECIDED) 3934 if (coding->eol_type == CODING_EOL_UNDECIDED)
4126 detect_eol (coding, source, src_bytes); 3935 detect_eol (coding, source, src_bytes);
4127 3936
3937 coding->produced = coding->produced_char = 0;
3938 coding->consumed = coding->consumed_char = 0;
3939 coding->errors = 0;
3940 coding->result = CODING_FINISH_NORMAL;
3941
4128 switch (coding->type) 3942 switch (coding->type)
4129 { 3943 {
3944 case coding_type_sjis:
3945 decode_coding_sjis_big5 (coding, source, destination,
3946 src_bytes, dst_bytes, 1);
3947 break;
3948
3949 case coding_type_iso2022:
3950 decode_coding_iso2022 (coding, source, destination,
3951 src_bytes, dst_bytes);
3952 break;
3953
3954 case coding_type_big5:
3955 decode_coding_sjis_big5 (coding, source, destination,
3956 src_bytes, dst_bytes, 0);
3957 break;
3958
4130 case coding_type_emacs_mule: 3959 case coding_type_emacs_mule:
4131 case coding_type_undecided: 3960 decode_coding_emacs_mule (coding, source, destination,
4132 case coding_type_raw_text: 3961 src_bytes, dst_bytes);
4133 if (coding->eol_type == CODING_EOL_LF
4134 || coding->eol_type == CODING_EOL_UNDECIDED)
4135 goto label_no_conversion;
4136 result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
4137 break; 3962 break;
4138 3963
4139 case coding_type_sjis: 3964 case coding_type_ccl:
4140 result = decode_coding_sjis_big5 (coding, source, destination, 3965 ccl_coding_driver (coding, source, destination,
4141 src_bytes, dst_bytes, 1); 3966 src_bytes, dst_bytes, 0);
4142 break; 3967 break;
4143 3968
4144 case coding_type_iso2022: 3969 default:
4145 result = decode_coding_iso2022 (coding, source, destination, 3970 decode_eol (coding, source, destination, src_bytes, dst_bytes);
4146 src_bytes, dst_bytes); 3971 }
4147 break; 3972
4148 3973 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4149 case coding_type_big5: 3974 && coding->consumed == src_bytes)
4150 result = decode_coding_sjis_big5 (coding, source, destination, 3975 coding->result = CODING_FINISH_NORMAL;
4151 src_bytes, dst_bytes, 0); 3976
4152 break; 3977 if (coding->mode & CODING_MODE_LAST_BLOCK
4153 3978 && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4154 case coding_type_ccl: 3979 {
4155 result = ccl_coding_driver (coding, source, destination, 3980 unsigned char *src = source + coding->consumed;
4156 src_bytes, dst_bytes, 0); 3981 unsigned char *dst = destination + coding->produced;
4157 break; 3982
4158 3983 src_bytes -= coding->consumed;
4159 default: /* i.e. case coding_type_no_conversion: */ 3984 coding->errors++;
4160 label_no_conversion: 3985 if (COMPOSING_P (coding))
4161 if (dst_bytes && src_bytes > dst_bytes) 3986 DECODE_COMPOSITION_END ('1');
4162 { 3987 while (src_bytes--)
4163 coding->produced = dst_bytes; 3988 {
4164 result = CODING_FINISH_INSUFFICIENT_DST; 3989 int c = *src++;
4165 } 3990 dst += CHAR_STRING (c, dst);
4166 else 3991 coding->produced_char++;
4167 { 3992 }
4168 coding->produced = src_bytes; 3993 coding->consumed = coding->consumed_char = src - source;
4169 result = CODING_FINISH_NORMAL; 3994 coding->produced = dst - destination;
4170 } 3995 }
4171 if (dst_bytes) 3996
4172 bcopy (source, destination, coding->produced); 3997 if (!coding->dst_multibyte)
4173 else 3998 {
4174 safe_bcopy (source, destination, coding->produced); 3999 coding->produced = str_as_unibyte (destination, coding->produced);
4175 coding->fake_multibyte = 1; 4000 coding->produced_char = coding->produced;
4176 coding->consumed 4001 }
4177 = coding->consumed_char = coding->produced_char = coding->produced; 4002
4178 break; 4003 return coding->result;
4179 } 4004 }
4180 4005
4181 return result; 4006 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". The
4182 } 4007 multibyteness of the source is CODING->src_multibyte, the
4183 4008 multibyteness of the result is always unibyte. */
4184 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".
4185
4186 This function does not make full use of DESTINATION buffer. For
4187 instance, if coding->type is coding_type_iso2022, it uses only
4188 (DST_BYTES - 20) bytes of DESTINATION buffer. In the case that
4189 DST_BYTES is decided by the function encoding_buffer_size, it
4190 contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
4191 So, this function can encode the full SOURCE. But, in the other
4192 case, if you want to avoid carry over, you must supply at least 20
4193 bytes more area in DESTINATION buffer than expected maximum bytes
4194 that will be produced by this function. */
4195 4009
4196 int 4010 int
4197 encode_coding (coding, source, destination, src_bytes, dst_bytes) 4011 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4198 struct coding_system *coding; 4012 struct coding_system *coding;
4199 unsigned char *source, *destination; 4013 unsigned char *source, *destination;
4200 int src_bytes, dst_bytes; 4014 int src_bytes, dst_bytes;
4201 { 4015 {
4202 int result; 4016 coding->produced = coding->produced_char = 0;
4203 4017 coding->consumed = coding->consumed_char = 0;
4204 if (src_bytes <= 0 4018 coding->errors = 0;
4205 && ! (coding->mode & CODING_MODE_LAST_BLOCK 4019 coding->result = CODING_FINISH_NORMAL;
4206 && CODING_REQUIRE_FLUSHING (coding)))
4207 {
4208 coding->produced = coding->produced_char = 0;
4209 coding->consumed = coding->consumed_char = 0;
4210 coding->fake_multibyte = 0;
4211 return CODING_FINISH_NORMAL;
4212 }
4213 4020
4214 switch (coding->type) 4021 switch (coding->type)
4215 { 4022 {
4023 case coding_type_sjis:
4024 encode_coding_sjis_big5 (coding, source, destination,
4025 src_bytes, dst_bytes, 1);
4026 break;
4027
4028 case coding_type_iso2022:
4029 encode_coding_iso2022 (coding, source, destination,
4030 src_bytes, dst_bytes);
4031 break;
4032
4033 case coding_type_big5:
4034 encode_coding_sjis_big5 (coding, source, destination,
4035 src_bytes, dst_bytes, 0);
4036 break;
4037
4216 case coding_type_emacs_mule: 4038 case coding_type_emacs_mule:
4217 case coding_type_undecided: 4039 encode_coding_emacs_mule (coding, source, destination,
4218 case coding_type_raw_text: 4040 src_bytes, dst_bytes);
4219 if (coding->eol_type == CODING_EOL_LF
4220 || coding->eol_type == CODING_EOL_UNDECIDED)
4221 goto label_no_conversion;
4222 result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
4223 break; 4041 break;
4224 4042
4225 case coding_type_sjis: 4043 case coding_type_ccl:
4226 result = encode_coding_sjis_big5 (coding, source, destination, 4044 ccl_coding_driver (coding, source, destination,
4227 src_bytes, dst_bytes, 1); 4045 src_bytes, dst_bytes, 1);
4228 break; 4046 break;
4229 4047
4230 case coding_type_iso2022: 4048 default:
4231 result = encode_coding_iso2022 (coding, source, destination, 4049 encode_eol (coding, source, destination, src_bytes, dst_bytes);
4232 src_bytes, dst_bytes); 4050 }
4233 break; 4051
4234 4052 if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4235 case coding_type_big5: 4053 && coding->consumed == src_bytes)
4236 result = encode_coding_sjis_big5 (coding, source, destination, 4054 coding->result = CODING_FINISH_NORMAL;
4237 src_bytes, dst_bytes, 0); 4055
4238 break; 4056 if (coding->mode & CODING_MODE_LAST_BLOCK)
4239 4057 {
4240 case coding_type_ccl: 4058 unsigned char *src = source + coding->consumed;
4241 result = ccl_coding_driver (coding, source, destination, 4059 unsigned char *src_end = src + src_bytes;
4242 src_bytes, dst_bytes, 1); 4060 unsigned char *dst = destination + coding->produced;
4243 break; 4061
4244 4062 if (coding->type == coding_type_iso2022)
4245 default: /* i.e. case coding_type_no_conversion: */ 4063 ENCODE_RESET_PLANE_AND_REGISTER;
4246 label_no_conversion: 4064 if (COMPOSING_P (coding))
4247 if (dst_bytes && src_bytes > dst_bytes) 4065 *dst++ = ISO_CODE_ESC, *dst++ = '1';
4248 { 4066 if (coding->consumed < src_bytes)
4249 coding->produced = dst_bytes; 4067 {
4250 result = CODING_FINISH_INSUFFICIENT_DST; 4068 int len = src_bytes - coding->consumed;
4251 } 4069
4252 else 4070 BCOPY_SHORT (source + coding->consumed, dst, len);
4253 { 4071 if (coding->src_multibyte)
4254 coding->produced = src_bytes; 4072 len = str_as_unibyte (dst, len);
4255 result = CODING_FINISH_NORMAL; 4073 dst += len;
4256 } 4074 coding->consumed = src_bytes;
4257 if (dst_bytes) 4075 }
4258 bcopy (source, destination, coding->produced); 4076 coding->produced = coding->produced_char = dst - destination;
4259 else 4077 }
4260 safe_bcopy (source, destination, coding->produced); 4078
4261 if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) 4079 return coding->result;
4262 {
4263 unsigned char *p = destination, *pend = p + coding->produced;
4264 while (p < pend)
4265 if (*p++ == '\015') p[-1] = '\n';
4266 }
4267 coding->fake_multibyte = 1;
4268 coding->consumed
4269 = coding->consumed_char = coding->produced_char = coding->produced;
4270 break;
4271 }
4272
4273 return result;
4274 } 4080 }
4275 4081
4276 /* Scan text in the region between *BEG and *END (byte positions), 4082 /* Scan text in the region between *BEG and *END (byte positions),
4277 skip characters which we don't have to decode by coding system 4083 skip characters which we don't have to decode by coding system
4278 CODING at the head and tail, then set *BEG and *END to the region 4084 CODING at the head and tail, then set *BEG and *END to the region
4279 of the text we actually have to convert. The caller should move 4085 of the text we actually have to convert. The caller should move
4280 the gap out of the region in advance. 4086 the gap out of the region in advance if the region is from a
4087 buffer.
4281 4088
4282 If STR is not NULL, *BEG and *END are indices into STR. */ 4089 If STR is not NULL, *BEG and *END are indices into STR. */
4283 4090
4284 static void 4091 static void
4285 shrink_decoding_region (beg, end, coding, str) 4092 shrink_decoding_region (beg, end, coding, str)
4291 int eol_conversion; 4098 int eol_conversion;
4292 Lisp_Object translation_table; 4099 Lisp_Object translation_table;
4293 4100
4294 if (coding->type == coding_type_ccl 4101 if (coding->type == coding_type_ccl
4295 || coding->type == coding_type_undecided 4102 || coding->type == coding_type_undecided
4296 || !NILP (coding->post_read_conversion)) 4103 || coding->eol_type != CODING_EOL_LF
4104 || !NILP (coding->post_read_conversion)
4105 || coding->composing != COMPOSITION_DISABLED)
4297 { 4106 {
4298 /* We can't skip any data. */ 4107 /* We can't skip any data. */
4299 return; 4108 return;
4300 } 4109 }
4301 else if (coding->type == coding_type_no_conversion) 4110 if (coding->type == coding_type_no_conversion
4111 || coding->type == coding_type_raw_text
4112 || coding->type == coding_type_emacs_mule)
4302 { 4113 {
4303 /* We need no conversion, but don't have to skip any data here. 4114 /* We need no conversion, but don't have to skip any data here.
4304 Decoding routine handles them effectively anyway. */ 4115 Decoding routine handles them effectively anyway. */
4305 return; 4116 return;
4306 } 4117 }
4318 /* Some ASCII character should be tranlsated. We give up 4129 /* Some ASCII character should be tranlsated. We give up
4319 shrinking. */ 4130 shrinking. */
4320 return; 4131 return;
4321 } 4132 }
4322 4133
4323 eol_conversion = (coding->eol_type != CODING_EOL_LF); 4134 if (coding->heading_ascii >= 0)
4324
4325 if ((! eol_conversion) && (coding->heading_ascii >= 0))
4326 /* Detection routine has already found how much we can skip at the 4135 /* Detection routine has already found how much we can skip at the
4327 head. */ 4136 head. */
4328 *beg += coding->heading_ascii; 4137 *beg += coding->heading_ascii;
4329 4138
4330 if (str) 4139 if (str)
4338 endp_orig = endp = begp + *end - *beg; 4147 endp_orig = endp = begp + *end - *beg;
4339 } 4148 }
4340 4149
4341 switch (coding->type) 4150 switch (coding->type)
4342 { 4151 {
4343 case coding_type_emacs_mule:
4344 case coding_type_raw_text:
4345 if (eol_conversion)
4346 {
4347 if (coding->heading_ascii < 0)
4348 while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
4349 while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
4350 endp--;
4351 /* Do not consider LF as ascii if preceded by CR, since that
4352 confuses eol decoding. */
4353 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4354 endp++;
4355 }
4356 else
4357 begp = endp;
4358 break;
4359
4360 case coding_type_sjis: 4152 case coding_type_sjis:
4361 case coding_type_big5: 4153 case coding_type_big5:
4362 /* We can skip all ASCII characters at the head. */ 4154 /* We can skip all ASCII characters at the head. */
4363 if (coding->heading_ascii < 0) 4155 if (coding->heading_ascii < 0)
4364 { 4156 {
4379 endp++; 4171 endp++;
4380 if (begp < endp && endp < endp_orig && endp[-1] >= 0x80) 4172 if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
4381 endp++; 4173 endp++;
4382 break; 4174 break;
4383 4175
4384 default: /* i.e. case coding_type_iso2022: */ 4176 case coding_type_iso2022:
4385 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) 4177 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4386 /* We can't skip any data. */ 4178 /* We can't skip any data. */
4387 break; 4179 break;
4388 if (coding->heading_ascii < 0) 4180 if (coding->heading_ascii < 0)
4389 { 4181 {
4450 } 4242 }
4451 else if (eight_bit) 4243 else if (eight_bit)
4452 endp = eight_bit; 4244 endp = eight_bit;
4453 } 4245 }
4454 } 4246 }
4247 break;
4248
4249 default:
4250 abort ();
4455 } 4251 }
4456 *beg += begp - begp_orig; 4252 *beg += begp - begp_orig;
4457 *end += endp - endp_orig; 4253 *end += endp - endp_orig;
4458 return; 4254 return;
4459 } 4255 }
4468 { 4264 {
4469 unsigned char *begp_orig, *begp, *endp_orig, *endp; 4265 unsigned char *begp_orig, *begp, *endp_orig, *endp;
4470 int eol_conversion; 4266 int eol_conversion;
4471 Lisp_Object translation_table; 4267 Lisp_Object translation_table;
4472 4268
4473 if (coding->type == coding_type_ccl) 4269 if (coding->type == coding_type_ccl
4474 /* We can't skip any data. */ 4270 || coding->eol_type == CODING_EOL_CRLF
4475 return; 4271 || coding->eol_type == CODING_EOL_CR
4476 else if (coding->type == coding_type_no_conversion) 4272 || coding->cmp_data && coding->cmp_data->used > 0)
4477 { 4273 {
4478 /* We need no conversion. */ 4274 /* We can't skip any data. */
4479 *beg = *end; 4275 return;
4276 }
4277 if (coding->type == coding_type_no_conversion
4278 || coding->type == coding_type_raw_text
4279 || coding->type == coding_type_emacs_mule
4280 || coding->type == coding_type_undecided)
4281 {
4282 /* We need no conversion, but don't have to skip any data here.
4283 Encoding routine handles them effectively anyway. */
4480 return; 4284 return;
4481 } 4285 }
4482 4286
4483 translation_table = coding->translation_table_for_encode; 4287 translation_table = coding->translation_table_for_encode;
4484 if (NILP (translation_table) && !NILP (Venable_character_translation)) 4288 if (NILP (translation_table) && !NILP (Venable_character_translation))
4511 4315
4512 /* Here, we don't have to check coding->pre_write_conversion because 4316 /* Here, we don't have to check coding->pre_write_conversion because
4513 the caller is expected to have handled it already. */ 4317 the caller is expected to have handled it already. */
4514 switch (coding->type) 4318 switch (coding->type)
4515 { 4319 {
4516 case coding_type_undecided:
4517 case coding_type_emacs_mule:
4518 case coding_type_raw_text:
4519 if (eol_conversion)
4520 {
4521 while (begp < endp && *begp != '\n') begp++;
4522 while (begp < endp && endp[-1] != '\n') endp--;
4523 }
4524 else
4525 begp = endp;
4526 break;
4527
4528 case coding_type_iso2022: 4320 case coding_type_iso2022:
4529 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) 4321 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
4530 /* We can't skip any data. */ 4322 /* We can't skip any data. */
4531 break; 4323 break;
4532 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL) 4324 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
4541 begp = bol; 4333 begp = bol;
4542 goto label_skip_tail; 4334 goto label_skip_tail;
4543 } 4335 }
4544 /* fall down ... */ 4336 /* fall down ... */
4545 4337
4546 default: 4338 case coding_type_sjis:
4339 case coding_type_big5:
4547 /* We can skip all ASCII characters at the head and tail. */ 4340 /* We can skip all ASCII characters at the head and tail. */
4548 if (eol_conversion) 4341 if (eol_conversion)
4549 while (begp < endp && *begp < 0x80 && *begp != '\n') begp++; 4342 while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
4550 else 4343 else
4551 while (begp < endp && *begp < 0x80) begp++; 4344 while (begp < endp && *begp < 0x80) begp++;
4553 if (eol_conversion) 4346 if (eol_conversion)
4554 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--; 4347 while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
4555 else 4348 else
4556 while (begp < endp && *(endp - 1) < 0x80) endp--; 4349 while (begp < endp && *(endp - 1) < 0x80) endp--;
4557 break; 4350 break;
4351
4352 default:
4353 abort ();
4558 } 4354 }
4559 4355
4560 *beg += begp - begp_orig; 4356 *beg += begp - begp_orig;
4561 *end += endp - endp_orig; 4357 *end += endp - endp_orig;
4562 return; 4358 return;
4717 characters (and bytes) are recorded in members of the structure 4513 characters (and bytes) are recorded in members of the structure
4718 CODING. 4514 CODING.
4719 4515
4720 If REPLACE is nonzero, we do various things as if the original text 4516 If REPLACE is nonzero, we do various things as if the original text
4721 is deleted and a new text is inserted. See the comments in 4517 is deleted and a new text is inserted. See the comments in
4722 replace_range (insdel.c) to know what we are doing. */ 4518 replace_range (insdel.c) to know what we are doing.
4519
4520 If REPLACE is zero, it is assumed that the source text is unibyte.
4521 Otherwize, it is assumed that the source text is multibyte. */
4723 4522
4724 int 4523 int
4725 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) 4524 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4726 int from, from_byte, to, to_byte, encodep, replace; 4525 int from, from_byte, to, to_byte, encodep, replace;
4727 struct coding_system *coding; 4526 struct coding_system *coding;
4728 { 4527 {
4729 int len = to - from, len_byte = to_byte - from_byte; 4528 int len = to - from, len_byte = to_byte - from_byte;
4730 int require, inserted, inserted_byte; 4529 int require, inserted, inserted_byte;
4731 int head_skip, tail_skip, total_skip = 0; 4530 int head_skip, tail_skip, total_skip = 0;
4732 Lisp_Object saved_coding_symbol; 4531 Lisp_Object saved_coding_symbol;
4733 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
4734 int first = 1; 4532 int first = 1;
4735 int fake_multibyte = 0;
4736 unsigned char *src, *dst; 4533 unsigned char *src, *dst;
4737 Lisp_Object deletion; 4534 Lisp_Object deletion;
4738 int orig_point = PT, orig_len = len; 4535 int orig_point = PT, orig_len = len;
4739 int prev_Z; 4536 int prev_Z;
4537 int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
4538
4539 coding->src_multibyte = replace && multibyte_p;
4540 coding->dst_multibyte = multibyte_p;
4740 4541
4741 deletion = Qnil; 4542 deletion = Qnil;
4742 saved_coding_symbol = Qnil; 4543 saved_coding_symbol = Qnil;
4743 4544
4744 if (from < PT && PT < to) 4545 if (from < PT && PT < to)
4753 4554
4754 prepare_to_modify_buffer (from, to, &from); 4555 prepare_to_modify_buffer (from, to, &from);
4755 if (saved_from != from) 4556 if (saved_from != from)
4756 { 4557 {
4757 to = from + len; 4558 to = from + len;
4758 if (multibyte) 4559 from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4759 from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
4760 else
4761 from_byte = from, to_byte = to;
4762 len_byte = to_byte - from_byte; 4560 len_byte = to_byte - from_byte;
4763 } 4561 }
4764 } 4562 }
4765 4563
4766 if (! encodep && CODING_REQUIRE_DETECTION (coding)) 4564 if (! encodep && CODING_REQUIRE_DETECTION (coding))
4789 encounter an inconsitent eol format while decoding. */ 4587 encounter an inconsitent eol format while decoding. */
4790 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; 4588 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4791 } 4589 }
4792 } 4590 }
4793 4591
4794 if (encodep
4795 ? ! CODING_REQUIRE_ENCODING (coding)
4796 : ! CODING_REQUIRE_DECODING (coding))
4797 {
4798 coding->consumed_char = len;
4799 coding->consumed = len_byte;
4800 coding->produced = len_byte;
4801 if (multibyte
4802 && ! replace
4803 /* See the comment of the member heading_ascii in coding.h. */
4804 && coding->heading_ascii < len_byte)
4805 {
4806 /* We still may have to combine byte at the head and the
4807 tail of the text in the region. */
4808 if (from < GPT && GPT < to)
4809 move_gap_both (to, to_byte);
4810 len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4811 adjust_after_insert (from, from_byte, to, to_byte, len);
4812 coding->produced_char = len;
4813 }
4814 else
4815 {
4816 if (!replace)
4817 adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4818 coding->produced_char = len_byte;
4819 }
4820 return 0;
4821 }
4822
4823 /* Now we convert the text. */ 4592 /* Now we convert the text. */
4824 4593
4825 /* For encoding, we must process pre-write-conversion in advance. */ 4594 /* For encoding, we must process pre-write-conversion in advance. */
4826 if (encodep 4595 if (! inhibit_pre_post_conversion
4827 && ! NILP (coding->pre_write_conversion) 4596 && encodep
4828 && SYMBOLP (coding->pre_write_conversion) 4597 && SYMBOLP (coding->pre_write_conversion)
4829 && ! NILP (Ffboundp (coding->pre_write_conversion))) 4598 && ! NILP (Ffboundp (coding->pre_write_conversion)))
4830 { 4599 {
4831 /* The function in pre-write-conversion may put a new text in a 4600 /* The function in pre-write-conversion may put a new text in a
4832 new buffer. */ 4601 new buffer. */
4857 orig_point += len - orig_len; 4626 orig_point += len - orig_len;
4858 else if (orig_point > from) 4627 else if (orig_point > from)
4859 orig_point = from; 4628 orig_point = from;
4860 orig_len = len; 4629 orig_len = len;
4861 to = from + len; 4630 to = from + len;
4862 from_byte = multibyte ? CHAR_TO_BYTE (from) : from_byte; 4631 from_byte = CHAR_TO_BYTE (from);
4863 to_byte = multibyte ? CHAR_TO_BYTE (to) : to; 4632 to_byte = CHAR_TO_BYTE (to);
4864 len_byte = to_byte - from_byte; 4633 len_byte = to_byte - from_byte;
4865 TEMP_SET_PT_BOTH (from, from_byte); 4634 TEMP_SET_PT_BOTH (from, from_byte);
4866 } 4635 }
4867 } 4636 }
4868 4637
4875 coding_save_composition (coding, from, to, Fcurrent_buffer ()); 4644 coding_save_composition (coding, from, to, Fcurrent_buffer ());
4876 else 4645 else
4877 coding_allocate_composition_data (coding, from); 4646 coding_allocate_composition_data (coding, from);
4878 } 4647 }
4879 4648
4880 /* For conversion by CCL program and for encoding with composition 4649 /* Try to skip the heading and tailing ASCIIs. */
4881 handling, we can't skip any character because we may convert or 4650 {
4882 compose even ASCII characters. */ 4651 int from_byte_orig = from_byte, to_byte_orig = to_byte;
4883 if (coding->type != coding_type_ccl 4652
4884 && (!encodep || coding->cmp_data == NULL)) 4653 if (from < GPT && GPT < to)
4885 { 4654 move_gap_both (from, from_byte);
4886 /* Try to skip the heading and tailing ASCIIs. */ 4655 SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
4887 int from_byte_orig = from_byte, to_byte_orig = to_byte; 4656 if (from_byte == to_byte
4888 4657 && (encodep || NILP (coding->post_read_conversion))
4889 if (from < GPT && GPT < to) 4658 && ! CODING_REQUIRE_FLUSHING (coding))
4890 move_gap_both (from, from_byte); 4659 {
4891 SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep); 4660 coding->produced = len_byte;
4892 if (from_byte == to_byte 4661 coding->produced_char = len;
4893 && (encodep || NILP (coding->post_read_conversion)) 4662 if (!replace)
4894 && ! CODING_REQUIRE_FLUSHING (coding)) 4663 /* We must record and adjust for this new text now. */
4895 { 4664 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4896 coding->produced = len_byte; 4665 return 0;
4897 coding->produced_char = multibyte ? len : len_byte; 4666 }
4898 if (!replace) 4667
4899 /* We must record and adjust for this new text now. */ 4668 head_skip = from_byte - from_byte_orig;
4900 adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); 4669 tail_skip = to_byte_orig - to_byte;
4901 return 0; 4670 total_skip = head_skip + tail_skip;
4902 } 4671 from += head_skip;
4903 4672 to -= tail_skip;
4904 head_skip = from_byte - from_byte_orig; 4673 len -= total_skip; len_byte -= total_skip;
4905 tail_skip = to_byte_orig - to_byte; 4674 }
4906 total_skip = head_skip + tail_skip;
4907 from += head_skip;
4908 to -= tail_skip;
4909 len -= total_skip; len_byte -= total_skip;
4910
4911 if (coding->cmp_data)
4912 coding->cmp_data->char_offset = from;
4913 }
4914 4675
4915 /* The code conversion routine can not preserve text properties for 4676 /* The code conversion routine can not preserve text properties for
4916 now. So, we must remove all text properties in the region. 4677 now. So, we must remove all text properties in the region.
4917 Here, we must suppress all modification hooks. */ 4678 Here, we must suppress all modification hooks. */
4918 if (replace) 4679 if (replace)
4945 if (GPT - BEG < BEG_UNCHANGED) 4706 if (GPT - BEG < BEG_UNCHANGED)
4946 BEG_UNCHANGED = GPT - BEG; 4707 BEG_UNCHANGED = GPT - BEG;
4947 if (Z - GPT < END_UNCHANGED) 4708 if (Z - GPT < END_UNCHANGED)
4948 END_UNCHANGED = Z - GPT; 4709 END_UNCHANGED = Z - GPT;
4949 4710
4711 if (!encodep && coding->src_multibyte)
4712 {
4713 /* Decoding routines expects that the source text is unibyte.
4714 We must convert 8-bit characters of multibyte form to
4715 unibyte. */
4716 int len_byte_orig = len_byte;
4717 len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
4718 if (len_byte < len_byte_orig)
4719 safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
4720 len_byte);
4721 coding->src_multibyte = 0;
4722 }
4723
4950 for (;;) 4724 for (;;)
4951 { 4725 {
4952 int result; 4726 int result;
4953 4727
4954 /* The buffer memory is now: 4728 /* The buffer memory is now:
4955 +--------+converted-text+---------+-------original-text------+---+ 4729 +--------+converted-text+---------+-------original-text-------+---+
4956 |<-from->|<--inserted-->|---------|<-----------len---------->|---| 4730 |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
4957 |<------------------- GAP_SIZE -------------------->| */ 4731 |<---------------------- GAP ----------------------->| */
4958 src = GAP_END_ADDR - len_byte; 4732 src = GAP_END_ADDR - len_byte;
4959 dst = GPT_ADDR + inserted_byte; 4733 dst = GPT_ADDR + inserted_byte;
4960 4734
4961 if (encodep) 4735 if (encodep)
4962 result = encode_coding (coding, src, dst, len_byte, 0); 4736 result = encode_coding (coding, src, dst, len_byte, 0);
4963 else 4737 else
4964 result = decode_coding (coding, src, dst, len_byte, 0); 4738 result = decode_coding (coding, src, dst, len_byte, 0);
4965 4739
4966 /* The buffer memory is now: 4740 /* The buffer memory is now:
4967 +--------+-------converted-text--------+--+---original-text--+---+ 4741 +--------+-------converted-text----+--+------original-text----+---+
4968 |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---| 4742 |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
4969 |<------------------- GAP_SIZE -------------------->| */ 4743 |<---------------------- GAP ----------------------->| */
4970 4744
4971 if (coding->fake_multibyte)
4972 fake_multibyte = 1;
4973
4974 if (!encodep && !multibyte)
4975 coding->produced_char = coding->produced;
4976 inserted += coding->produced_char; 4745 inserted += coding->produced_char;
4977 inserted_byte += coding->produced; 4746 inserted_byte += coding->produced;
4978 len_byte -= coding->consumed; 4747 len_byte -= coding->consumed;
4979 4748
4980 if (result == CODING_FINISH_INSUFFICIENT_CMP) 4749 if (result == CODING_FINISH_INSUFFICIENT_CMP)
5066 make them valid buffer contents, and finish conversion. */ 4835 make them valid buffer contents, and finish conversion. */
5067 inserted += len_byte; 4836 inserted += len_byte;
5068 inserted_byte += len_byte; 4837 inserted_byte += len_byte;
5069 while (len_byte--) 4838 while (len_byte--)
5070 *dst++ = *src++; 4839 *dst++ = *src++;
5071 fake_multibyte = 1;
5072 break; 4840 break;
5073 } 4841 }
5074 if (result == CODING_FINISH_INTERRUPT) 4842 if (result == CODING_FINISH_INTERRUPT)
5075 { 4843 {
5076 /* The conversion procedure was interrupted by a user. */ 4844 /* The conversion procedure was interrupted by a user. */
5077 fake_multibyte = 1;
5078 break; 4845 break;
5079 } 4846 }
5080 /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */ 4847 /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
5081 if (coding->consumed < 1) 4848 if (coding->consumed < 1)
5082 { 4849 {
5083 /* It's quite strange to require more memory without 4850 /* It's quite strange to require more memory without
5084 consuming any bytes. Perhaps CCL program bug. */ 4851 consuming any bytes. Perhaps CCL program bug. */
5085 fake_multibyte = 1;
5086 break; 4852 break;
5087 } 4853 }
5088 if (first) 4854 if (first)
5089 { 4855 {
5090 /* We have just done the first batch of conversion which was 4856 /* We have just done the first batch of conversion which was
5116 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; 4882 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5117 } 4883 }
5118 } 4884 }
5119 if (src - dst > 0) *dst = 0; /* Put an anchor. */ 4885 if (src - dst > 0) *dst = 0; /* Put an anchor. */
5120 4886
5121 if (multibyte 4887 if (encodep && coding->dst_multibyte)
5122 && (encodep 4888 {
5123 || fake_multibyte 4889 /* The output is unibyte. We must convert 8-bit characters to
5124 || (to - from) != (to_byte - from_byte))) 4890 multibyte form. */
5125 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); 4891 if (inserted_byte * 2 > GAP_SIZE)
4892 {
4893 GAP_SIZE -= inserted_byte;
4894 ZV += inserted_byte; Z += inserted_byte;
4895 ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
4896 GPT += inserted_byte; GPT_BYTE += inserted_byte;
4897 make_gap (inserted_byte - GAP_SIZE);
4898 GAP_SIZE += inserted_byte;
4899 ZV -= inserted_byte; Z -= inserted_byte;
4900 ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
4901 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4902 }
4903 inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
4904 }
5126 4905
5127 /* If we have shrinked the conversion area, adjust it now. */ 4906 /* If we have shrinked the conversion area, adjust it now. */
5128 if (total_skip > 0) 4907 if (total_skip > 0)
5129 { 4908 {
5130 if (tail_skip > 0) 4909 if (tail_skip > 0)
5144 4923
5145 if (!encodep && coding->cmp_data && coding->cmp_data->used) 4924 if (!encodep && coding->cmp_data && coding->cmp_data->used)
5146 coding_restore_composition (coding, Fcurrent_buffer ()); 4925 coding_restore_composition (coding, Fcurrent_buffer ());
5147 coding_free_composition_data (coding); 4926 coding_free_composition_data (coding);
5148 4927
5149 if (! encodep && ! NILP (coding->post_read_conversion)) 4928 if (! inhibit_pre_post_conversion
4929 && ! encodep && ! NILP (coding->post_read_conversion))
5150 { 4930 {
5151 Lisp_Object val; 4931 Lisp_Object val;
5152 int count = specpdl_ptr - specpdl; 4932 int count = specpdl_ptr - specpdl;
5153 4933
5154 if (from != PT) 4934 if (from != PT)
5190 4970
5191 return 0; 4971 return 0;
5192 } 4972 }
5193 4973
5194 Lisp_Object 4974 Lisp_Object
5195 code_convert_string (str, coding, encodep, nocopy) 4975 run_pre_post_conversion_on_str (str, coding, encodep)
5196 Lisp_Object str; 4976 Lisp_Object str;
5197 struct coding_system *coding; 4977 struct coding_system *coding;
5198 int encodep, nocopy; 4978 int encodep;
4979 {
4980 int count = specpdl_ptr - specpdl;
4981 struct gcpro gcpro1;
4982 struct buffer *prev = current_buffer;
4983 int multibyte = STRING_MULTIBYTE (str);
4984
4985 record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4986 record_unwind_protect (code_convert_region_unwind, Qnil);
4987 GCPRO1 (str);
4988 temp_output_buffer_setup (" *code-converting-work*");
4989 set_buffer_internal (XBUFFER (Vstandard_output));
4990 /* We must insert the contents of STR as is without
4991 unibyte<->multibyte conversion. For that, we adjust the
4992 multibyteness of the working buffer to that of STR. */
4993 Ferase_buffer ();
4994 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
4995 insert_from_string (str, 0, 0,
4996 XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
4997 UNGCPRO;
4998 inhibit_pre_post_conversion = 1;
4999 if (encodep)
5000 call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5001 else
5002 call1 (coding->post_read_conversion, make_number (Z - BEG));
5003 inhibit_pre_post_conversion = 0;
5004 str = make_buffer_string (BEG, Z, 0);
5005 return unbind_to (count, str);
5006 }
5007
5008 Lisp_Object
5009 decode_coding_string (str, coding, nocopy)
5010 Lisp_Object str;
5011 struct coding_system *coding;
5012 int nocopy;
5199 { 5013 {
5200 int len; 5014 int len;
5201 char *buf; 5015 char *buf;
5202 int from = 0, to = XSTRING (str)->size; 5016 int from, to, to_byte;
5203 int to_byte = STRING_BYTES (XSTRING (str));
5204 struct gcpro gcpro1; 5017 struct gcpro gcpro1;
5205 Lisp_Object saved_coding_symbol; 5018 Lisp_Object saved_coding_symbol;
5206 int result; 5019 int result;
5207 5020
5021 from = 0;
5022 to = XSTRING (str)->size;
5023 to_byte = STRING_BYTES (XSTRING (str));
5024
5208 saved_coding_symbol = Qnil; 5025 saved_coding_symbol = Qnil;
5209 if ((encodep && !NILP (coding->pre_write_conversion) 5026 if (CODING_REQUIRE_DETECTION (coding))
5210 || !encodep && !NILP (coding->post_read_conversion)))
5211 {
5212 /* Since we have to call Lisp functions which assume target text
5213 is in a buffer, after setting a temporary buffer, call
5214 code_convert_region. */
5215 int count = specpdl_ptr - specpdl;
5216 struct buffer *prev = current_buffer;
5217 int multibyte = STRING_MULTIBYTE (str);
5218
5219 record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5220 record_unwind_protect (code_convert_region_unwind, Qnil);
5221 inhibit_pre_post_conversion = 1;
5222 GCPRO1 (str);
5223 temp_output_buffer_setup (" *code-converting-work*");
5224 set_buffer_internal (XBUFFER (Vstandard_output));
5225 /* We must insert the contents of STR as is without
5226 unibyte<->multibyte conversion. For that, we adjust the
5227 multibyteness of the working buffer to that of STR. */
5228 Ferase_buffer (); /* for safety */
5229 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
5230 insert_from_string (str, 0, 0, to, to_byte, 0);
5231 UNGCPRO;
5232 code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
5233 /* Make a unibyte string if we are encoding, otherwise make a
5234 multibyte string. */
5235 Fset_buffer_multibyte (encodep ? Qnil : Qt);
5236 str = make_buffer_string (BEGV, ZV, 0);
5237 return unbind_to (count, str);
5238 }
5239
5240 if (! encodep && CODING_REQUIRE_DETECTION (coding))
5241 { 5027 {
5242 /* See the comments in code_convert_region. */ 5028 /* See the comments in code_convert_region. */
5243 if (coding->type == coding_type_undecided) 5029 if (coding->type == coding_type_undecided)
5244 { 5030 {
5245 detect_coding (coding, XSTRING (str)->data, to_byte); 5031 detect_coding (coding, XSTRING (str)->data, to_byte);
5256 encounter an inconsitent eol format while decoding. */ 5042 encounter an inconsitent eol format while decoding. */
5257 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; 5043 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5258 } 5044 }
5259 } 5045 }
5260 5046
5261 if (encodep 5047 if (! CODING_REQUIRE_DECODING (coding))
5262 ? ! CODING_REQUIRE_ENCODING (coding) 5048 {
5263 : ! CODING_REQUIRE_DECODING (coding)) 5049 if (!STRING_MULTIBYTE (str))
5264 return (nocopy ? str : Fcopy_sequence (str)); 5050 {
5051 str = Fstring_as_multibyte (str);
5052 nocopy = 1;
5053 }
5054 return (nocopy ? str : Fcopy_sequence (str));
5055 }
5056
5057 if (STRING_MULTIBYTE (str))
5058 {
5059 /* Decoding routines expect the source text to be unibyte. */
5060 str = Fstring_as_unibyte (str);
5061 nocopy = 1;
5062 coding->src_multibyte = 0;
5063 }
5064 coding->dst_multibyte = 1;
5265 5065
5266 if (coding->composing != COMPOSITION_DISABLED) 5066 if (coding->composing != COMPOSITION_DISABLED)
5267 { 5067 coding_allocate_composition_data (coding, from);
5268 if (encodep) 5068
5269 coding_save_composition (coding, from, to, str); 5069 /* Try to skip the heading and tailing ASCIIs. */
5270 else 5070 {
5271 coding_allocate_composition_data (coding, from); 5071 int from_orig = from;
5272 } 5072
5273 5073 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5274 /* For conversion by CCL program and for encoding with composition 5074 0);
5275 handling, we can't skip any character because we may convert or 5075 if (from == to_byte)
5276 compose even ASCII characters. */ 5076 return (nocopy ? str : Fcopy_sequence (str));
5277 if (coding->type != coding_type_ccl 5077 }
5278 && (!encodep || coding->cmp_data == NULL)) 5078
5279 { 5079 len = decoding_buffer_size (coding, to_byte - from);
5280 /* Try to skip the heading and tailing ASCIIs. */
5281 int from_orig = from;
5282
5283 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5284 encodep);
5285 if (from == to_byte)
5286 return (nocopy ? str : Fcopy_sequence (str));
5287
5288 if (coding->cmp_data)
5289 coding->cmp_data->char_offset = from;
5290 }
5291
5292 if (encodep)
5293 len = encoding_buffer_size (coding, to_byte - from);
5294 else
5295 len = decoding_buffer_size (coding, to_byte - from);
5296 len += from + STRING_BYTES (XSTRING (str)) - to_byte; 5080 len += from + STRING_BYTES (XSTRING (str)) - to_byte;
5297 GCPRO1 (str); 5081 GCPRO1 (str);
5298 buf = get_conversion_buffer (len); 5082 buf = get_conversion_buffer (len);
5299 UNGCPRO; 5083 UNGCPRO;
5300 5084
5301 if (from > 0) 5085 if (from > 0)
5302 bcopy (XSTRING (str)->data, buf, from); 5086 bcopy (XSTRING (str)->data, buf, from);
5303 result = (encodep 5087 result = decode_coding (coding, XSTRING (str)->data + from,
5304 ? encode_coding (coding, XSTRING (str)->data + from, 5088 buf + from, to_byte - from, len);
5305 buf + from, to_byte - from, len) 5089 if (result == CODING_FINISH_INCONSISTENT_EOL)
5306 : decode_coding (coding, XSTRING (str)->data + from,
5307 buf + from, to_byte - from, len));
5308 if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5309 { 5090 {
5310 /* We simply try to decode the whole string again but without 5091 /* We simply try to decode the whole string again but without
5311 eol-conversion this time. */ 5092 eol-conversion this time. */
5312 coding->eol_type = CODING_EOL_LF; 5093 coding->eol_type = CODING_EOL_LF;
5313 coding->symbol = saved_coding_symbol; 5094 coding->symbol = saved_coding_symbol;
5314 coding_free_composition_data (coding); 5095 coding_free_composition_data (coding);
5315 return code_convert_string (str, coding, encodep, nocopy); 5096 return decode_coding_string (str, coding, nocopy);
5316 } 5097 }
5317 5098
5318 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced, 5099 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
5319 STRING_BYTES (XSTRING (str)) - to_byte); 5100 STRING_BYTES (XSTRING (str)) - to_byte);
5320 5101
5321 len = from + STRING_BYTES (XSTRING (str)) - to_byte; 5102 len = from + STRING_BYTES (XSTRING (str)) - to_byte;
5322 if (encodep) 5103 str = make_multibyte_string (buf, len + coding->produced_char,
5323 str = make_unibyte_string (buf, len + coding->produced); 5104 len + coding->produced);
5324 else 5105
5325 { 5106 if (coding->cmp_data && coding->cmp_data->used)
5326 int chars= (coding->fake_multibyte
5327 ? multibyte_chars_in_text (buf + from, coding->produced)
5328 : coding->produced_char);
5329 str = make_multibyte_string (buf, len + chars, len + coding->produced);
5330 }
5331
5332 if (!encodep && coding->cmp_data && coding->cmp_data->used)
5333 coding_restore_composition (coding, str); 5107 coding_restore_composition (coding, str);
5334
5335 coding_free_composition_data (coding); 5108 coding_free_composition_data (coding);
5109
5110 if (SYMBOLP (coding->post_read_conversion)
5111 && !NILP (Ffboundp (coding->post_read_conversion)))
5112 str = run_pre_post_conversion_on_str (str, 0);
5113
5114 return str;
5115 }
5116
5117 Lisp_Object
5118 encode_coding_string (str, coding, nocopy)
5119 Lisp_Object str;
5120 struct coding_system *coding;
5121 int nocopy;
5122 {
5123 int len;
5124 char *buf;
5125 int from, to, to_byte;
5126 struct gcpro gcpro1;
5127 Lisp_Object saved_coding_symbol;
5128 int result;
5129
5130 if (SYMBOLP (coding->pre_write_conversion)
5131 && !NILP (Ffboundp (coding->pre_write_conversion)))
5132 str = run_pre_post_conversion_on_str (str, 1);
5133
5134 from = 0;
5135 to = XSTRING (str)->size;
5136 to_byte = STRING_BYTES (XSTRING (str));
5137
5138 saved_coding_symbol = Qnil;
5139 if (! CODING_REQUIRE_ENCODING (coding))
5140 {
5141 if (STRING_MULTIBYTE (str))
5142 {
5143 str = Fstring_as_unibyte (str);
5144 nocopy = 1;
5145 }
5146 return (nocopy ? str : Fcopy_sequence (str));
5147 }
5148
5149 /* Encoding routines determine the multibyteness of the source text
5150 by coding->src_multibyte. */
5151 coding->src_multibyte = STRING_MULTIBYTE (str);
5152 coding->dst_multibyte = 0;
5153
5154 if (coding->composing != COMPOSITION_DISABLED)
5155 coding_save_composition (coding, from, to, str);
5156
5157 /* Try to skip the heading and tailing ASCIIs. */
5158 {
5159 int from_orig = from;
5160
5161 SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
5162 1);
5163 if (from == to_byte)
5164 return (nocopy ? str : Fcopy_sequence (str));
5165 }
5166
5167 len = encoding_buffer_size (coding, to_byte - from);
5168 len += from + STRING_BYTES (XSTRING (str)) - to_byte;
5169 GCPRO1 (str);
5170 buf = get_conversion_buffer (len);
5171 UNGCPRO;
5172
5173 if (from > 0)
5174 bcopy (XSTRING (str)->data, buf, from);
5175 result = encode_coding (coding, XSTRING (str)->data + from,
5176 buf + from, to_byte - from, len);
5177 bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
5178 STRING_BYTES (XSTRING (str)) - to_byte);
5179
5180 len = from + STRING_BYTES (XSTRING (str)) - to_byte;
5181 str = make_unibyte_string (buf, len + coding->produced);
5182 coding_free_composition_data (coding);
5183
5336 return str; 5184 return str;
5337 } 5185 }
5338 5186
5339 5187
5340 #ifdef emacs 5188 #ifdef emacs
5541 5389
5542 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 5390 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5543 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); 5391 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5544 5392
5545 coding.mode |= CODING_MODE_LAST_BLOCK; 5393 coding.mode |= CODING_MODE_LAST_BLOCK;
5394 coding.src_multibyte = coding.dst_multibyte
5395 = !NILP (current_buffer->enable_multibyte_characters);
5546 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), 5396 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
5547 &coding, encodep, 1); 5397 &coding, encodep, 1);
5548 Vlast_coding_system_used = coding.symbol; 5398 Vlast_coding_system_used = coding.symbol;
5549 return make_number (coding.produced_char); 5399 return make_number (coding.produced_char);
5550 } 5400 }
5594 5444
5595 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 5445 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5596 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); 5446 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5597 5447
5598 coding.mode |= CODING_MODE_LAST_BLOCK; 5448 coding.mode |= CODING_MODE_LAST_BLOCK;
5599 string = code_convert_string (string, &coding, encodep, !NILP (nocopy)); 5449 string = (encodep
5450 ? encode_coding_string (string, &coding, !NILP (nocopy))
5451 : decode_coding_string (string, &coding, !NILP (nocopy)));
5600 Vlast_coding_system_used = coding.symbol; 5452 Vlast_coding_system_used = coding.symbol;
5601 5453
5602 return string; 5454 return string;
5603 } 5455 }
5604 5456
5652 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 5504 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
5653 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); 5505 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
5654 5506
5655 coding.composing = COMPOSITION_DISABLED; 5507 coding.composing = COMPOSITION_DISABLED;
5656 coding.mode |= CODING_MODE_LAST_BLOCK; 5508 coding.mode |= CODING_MODE_LAST_BLOCK;
5657 return code_convert_string (string, &coding, encodep, 1); 5509 return (encodep
5510 ? encode_coding_string (string, &coding, 1)
5511 : decode_coding_string (string, &coding, 1));
5658 } 5512 }
5659 5513
5660 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, 5514 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
5661 "Decode a Japanese character which has CODE in shift_jis encoding.\n\ 5515 "Decode a Japanese character which has CODE in shift_jis encoding.\n\
5662 Return the corresponding character.") 5516 Return the corresponding character.")
5671 if (s1 == 0) 5525 if (s1 == 0)
5672 { 5526 {
5673 if (s2 < 0x80) 5527 if (s2 < 0x80)
5674 XSETFASTINT (val, s2); 5528 XSETFASTINT (val, s2);
5675 else if (s2 >= 0xA0 || s2 <= 0xDF) 5529 else if (s2 >= 0xA0 || s2 <= 0xDF)
5676 XSETFASTINT (val, 5530 XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
5677 MAKE_NON_ASCII_CHAR (charset_katakana_jisx0201, s2, 0));
5678 else 5531 else
5679 error ("Invalid Shift JIS code: %x", XFASTINT (code)); 5532 error ("Invalid Shift JIS code: %x", XFASTINT (code));
5680 } 5533 }
5681 else 5534 else
5682 { 5535 {
5683 if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF) 5536 if ((s1 < 0x80 || s1 > 0x9F && s1 < 0xE0 || s1 > 0xEF)
5684 || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)) 5537 || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
5685 error ("Invalid Shift JIS code: %x", XFASTINT (code)); 5538 error ("Invalid Shift JIS code: %x", XFASTINT (code));
5686 DECODE_SJIS (s1, s2, c1, c2); 5539 DECODE_SJIS (s1, s2, c1, c2);
5687 XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2)); 5540 XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
5688 } 5541 }
5689 return val; 5542 return val;
5690 } 5543 }
5691 5544
5692 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0, 5545 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
5742 { 5595 {
5743 if ((b1 < 0xA1 || b1 > 0xFE) 5596 if ((b1 < 0xA1 || b1 > 0xFE)
5744 || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)) 5597 || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
5745 error ("Invalid BIG5 code: %x", XFASTINT (code)); 5598 error ("Invalid BIG5 code: %x", XFASTINT (code));
5746 DECODE_BIG5 (b1, b2, charset, c1, c2); 5599 DECODE_BIG5 (b1, b2, charset, c1, c2);
5747 XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2)); 5600 XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
5748 } 5601 }
5749 return val; 5602 return val;
5750 } 5603 }
5751 5604
5752 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0, 5605 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
5787 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); 5640 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
5788 /* We had better not send unsafe characters to terminal. */ 5641 /* We had better not send unsafe characters to terminal. */
5789 terminal_coding.flags |= CODING_FLAG_ISO_SAFE; 5642 terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
5790 /* Characer composition should be disabled. */ 5643 /* Characer composition should be disabled. */
5791 terminal_coding.composing = COMPOSITION_DISABLED; 5644 terminal_coding.composing = COMPOSITION_DISABLED;
5645 terminal_coding.src_multibyte = 1;
5646 terminal_coding.dst_multibyte = 0;
5792 return Qnil; 5647 return Qnil;
5793 } 5648 }
5794 5649
5795 DEFUN ("set-safe-terminal-coding-system-internal", 5650 DEFUN ("set-safe-terminal-coding-system-internal",
5796 Fset_safe_terminal_coding_system_internal, 5651 Fset_safe_terminal_coding_system_internal,
5801 CHECK_SYMBOL (coding_system, 0); 5656 CHECK_SYMBOL (coding_system, 0);
5802 setup_coding_system (Fcheck_coding_system (coding_system), 5657 setup_coding_system (Fcheck_coding_system (coding_system),
5803 &safe_terminal_coding); 5658 &safe_terminal_coding);
5804 /* Characer composition should be disabled. */ 5659 /* Characer composition should be disabled. */
5805 safe_terminal_coding.composing = COMPOSITION_DISABLED; 5660 safe_terminal_coding.composing = COMPOSITION_DISABLED;
5661 safe_terminal_coding.src_multibyte = 1;
5662 safe_terminal_coding.dst_multibyte = 0;
5806 return Qnil; 5663 return Qnil;
5807 } 5664 }
5808 5665
5809 DEFUN ("terminal-coding-system", 5666 DEFUN ("terminal-coding-system",
5810 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0, 5667 Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
6022 emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4; 5879 emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
6023 emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4; 5880 emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
6024 5881
6025 /* ISO2022 specific initialize routine. */ 5882 /* ISO2022 specific initialize routine. */
6026 for (i = 0; i < 0x20; i++) 5883 for (i = 0; i < 0x20; i++)
6027 iso_code_class[i] = ISO_control_code; 5884 iso_code_class[i] = ISO_control_0;
6028 for (i = 0x21; i < 0x7F; i++) 5885 for (i = 0x21; i < 0x7F; i++)
6029 iso_code_class[i] = ISO_graphic_plane_0; 5886 iso_code_class[i] = ISO_graphic_plane_0;
6030 for (i = 0x80; i < 0xA0; i++) 5887 for (i = 0x80; i < 0xA0; i++)
6031 iso_code_class[i] = ISO_control_code; 5888 iso_code_class[i] = ISO_control_1;
6032 for (i = 0xA1; i < 0xFF; i++) 5889 for (i = 0xA1; i < 0xFF; i++)
6033 iso_code_class[i] = ISO_graphic_plane_1; 5890 iso_code_class[i] = ISO_graphic_plane_1;
6034 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F; 5891 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
6035 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF; 5892 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
6036 iso_code_class[ISO_CODE_CR] = ISO_carriage_return; 5893 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;