comparison src/coding.c @ 42104:d69c2368e549

(DECODE_COMPOSITION_END): Fixed a typo in the last patch (COMPOSING_P, not COMPOSING).
author Sam Steingold <sds@gnu.org>
date Mon, 17 Dec 2001 15:21:06 +0000
parents e5356ff675e3
children 09cc243e2d14
comparison
equal deleted inserted replaced
42103:e5356ff675e3 42104:d69c2368e549
58 Compound Text, various EUCs (Extended Unix Code), and coding 58 Compound Text, various EUCs (Extended Unix Code), and coding
59 systems used in Internet communication such as ISO-2022-JP are 59 systems used in Internet communication such as ISO-2022-JP are
60 all variants of ISO2022. Details are described in section 3. 60 all variants of ISO2022. Details are described in section 3.
61 61
62 2. SJIS (or Shift-JIS or MS-Kanji-Code) 62 2. SJIS (or Shift-JIS or MS-Kanji-Code)
63 63
64 A coding system to encode character sets: ASCII, JISX0201, and 64 A coding system to encode character sets: ASCII, JISX0201, and
65 JISX0208. Widely used for PC's in Japan. Details are described in 65 JISX0208. Widely used for PC's in Japan. Details are described in
66 section 4. 66 section 4.
67 67
68 3. BIG5 68 3. BIG5
501 Lisp_Object 501 Lisp_Object
502 coding_safe_chars (coding) 502 coding_safe_chars (coding)
503 struct coding_system *coding; 503 struct coding_system *coding;
504 { 504 {
505 Lisp_Object coding_spec, plist, safe_chars; 505 Lisp_Object coding_spec, plist, safe_chars;
506 506
507 coding_spec = Fget (coding->symbol, Qcoding_system); 507 coding_spec = Fget (coding->symbol, Qcoding_system);
508 plist = XVECTOR (coding_spec)->contents[3]; 508 plist = XVECTOR (coding_spec)->contents[3];
509 safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars); 509 safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
510 return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt); 510 return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
511 } 511 }
887 if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src)) 887 if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
888 { 888 {
889 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method); 889 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
890 for (i = 0; i < ncomponent; i++) 890 for (i = 0; i < ncomponent; i++)
891 CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]); 891 CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
892 CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars); 892 CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
893 if (buf < bufp) 893 if (buf < bufp)
894 { 894 {
895 unsigned char *p = buf; 895 unsigned char *p = buf;
896 EMIT_BYTES (p, bufp); 896 EMIT_BYTES (p, bufp);
897 *destination += bufp - buf; 897 *destination += bufp - buf;
1053 { \ 1053 { \
1054 coding->cmp_data = coding->cmp_data->next; \ 1054 coding->cmp_data = coding->cmp_data->next; \
1055 coding->cmp_data_start = 0; \ 1055 coding->cmp_data_start = 0; \
1056 } \ 1056 } \
1057 } while (0) 1057 } while (0)
1058 1058
1059 1059
1060 static void encode_eol P_ ((struct coding_system *, unsigned char *, 1060 static void encode_eol P_ ((struct coding_system *, unsigned char *,
1061 unsigned char *, int, int)); 1061 unsigned char *, int, int));
1062 1062
1063 static void 1063 static void
1431 /* Locking shift out. */ 1431 /* Locking shift out. */
1432 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT; 1432 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1433 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT; 1433 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1434 } 1434 }
1435 break; 1435 break;
1436 1436
1437 case ISO_CODE_SI: 1437 case ISO_CODE_SI:
1438 if (inhibit_iso_escape_detection) 1438 if (inhibit_iso_escape_detection)
1439 break; 1439 break;
1440 single_shifting = 0; 1440 single_shifting = 0;
1441 if (shift_out == 1) 1441 if (shift_out == 1)
1663 1663
1664 /* Handle composition end sequence ESC 1. */ 1664 /* Handle composition end sequence ESC 1. */
1665 1665
1666 #define DECODE_COMPOSITION_END(c1) \ 1666 #define DECODE_COMPOSITION_END(c1) \
1667 do { \ 1667 do { \
1668 if (! COMPOSING (coding)) \ 1668 if (! COMPOSING_P (coding)) \
1669 { \ 1669 { \
1670 *dst++ = ISO_CODE_ESC; \ 1670 *dst++ = ISO_CODE_ESC; \
1671 *dst++ = c1; \ 1671 *dst++ = c1; \
1672 coding->produced_char += 2; \ 1672 coding->produced_char += 2; \
1673 } \ 1673 } \
2437 while (found < 4) 2437 while (found < 4)
2438 { 2438 {
2439 ONE_MORE_CHAR (c); 2439 ONE_MORE_CHAR (c);
2440 if (c == '\n') 2440 if (c == '\n')
2441 break; 2441 break;
2442 2442
2443 charset = CHAR_CHARSET (c); 2443 charset = CHAR_CHARSET (c);
2444 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); 2444 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2445 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0) 2445 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2446 { 2446 {
2447 found++; 2447 found++;
2607 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; 2607 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2608 else 2608 else
2609 *dst++ = ISO_CODE_CR; 2609 *dst++ = ISO_CODE_CR;
2610 CODING_SPEC_ISO_BOL (coding) = 1; 2610 CODING_SPEC_ISO_BOL (coding) = 1;
2611 } 2611 }
2612 else 2612 else
2613 { 2613 {
2614 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) 2614 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2615 ENCODE_RESET_PLANE_AND_REGISTER; 2615 ENCODE_RESET_PLANE_AND_REGISTER;
2616 *dst++ = c; 2616 *dst++ = c;
2617 } 2617 }
3030 { 3030 {
3031 int c, charset, c1, c2; 3031 int c, charset, c1, c2;
3032 3032
3033 src_base = src; 3033 src_base = src;
3034 ONE_MORE_CHAR (c); 3034 ONE_MORE_CHAR (c);
3035 3035
3036 /* Now encode the character C. */ 3036 /* Now encode the character C. */
3037 if (SINGLE_BYTE_CHAR_P (c)) 3037 if (SINGLE_BYTE_CHAR_P (c))
3038 { 3038 {
3039 switch (c) 3039 switch (c)
3040 { 3040 {
3355 not designated initially but should be designated to graphic 3355 not designated initially but should be designated to graphic
3356 register N just before encoding a character in that charset. 3356 register N just before encoding a character in that charset.
3357 3357
3358 If the value is nil, graphic register N is never used on 3358 If the value is nil, graphic register N is never used on
3359 encoding. 3359 encoding.
3360 3360
3361 sub-element[N] where N is 4 through 11: to be set in `coding->flags' 3361 sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3362 Each value takes t or nil. See the section ISO2022 of 3362 Each value takes t or nil. See the section ISO2022 of
3363 `coding.h' for more information. 3363 `coding.h' for more information.
3364 3364
3365 If `coding->type' is `coding_type_big5', element[4] is t to denote 3365 If `coding->type' is `coding_type_big5', element[4] is t to denote
3479 else 3479 else
3480 goto label_invalid_coding_system; 3480 goto label_invalid_coding_system;
3481 } 3481 }
3482 else 3482 else
3483 goto label_invalid_coding_system; 3483 goto label_invalid_coding_system;
3484 3484
3485 /* If the coding system has non-nil `composition' property, enable 3485 /* If the coding system has non-nil `composition' property, enable
3486 composition handling. */ 3486 composition handling. */
3487 val = Fplist_get (plist, Qcomposition); 3487 val = Fplist_get (plist, Qcomposition);
3488 if (!NILP (val)) 3488 if (!NILP (val))
3489 coding->composing = COMPOSITION_NO; 3489 coding->composing = COMPOSITION_NO;
3619 tail = XCDR (tail); 3619 tail = XCDR (tail);
3620 } 3620 }
3621 } 3621 }
3622 else 3622 else
3623 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; 3623 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3624 3624
3625 CODING_SPEC_ISO_DESIGNATION (coding, i) 3625 CODING_SPEC_ISO_DESIGNATION (coding, i)
3626 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i); 3626 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3627 } 3627 }
3628 3628
3629 if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)) 3629 if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3992 | CODING_CATEGORY_MASK_EMACS_MULE 3992 | CODING_CATEGORY_MASK_EMACS_MULE
3993 | CODING_CATEGORY_MASK_UTF_16_BE 3993 | CODING_CATEGORY_MASK_UTF_16_BE
3994 | CODING_CATEGORY_MASK_UTF_16_LE); 3994 | CODING_CATEGORY_MASK_UTF_16_LE);
3995 3995
3996 /* Or, if C is a special latin extra code, 3996 /* Or, if C is a special latin extra code,
3997 or is an ISO2022 specific control code of C1 (SS2 or SS3), 3997 or is an ISO2022 specific control code of C1 (SS2 or SS3),
3998 or is an ISO2022 control-sequence-introducer (CSI), 3998 or is an ISO2022 control-sequence-introducer (CSI),
3999 we should also consider the possibility of ISO2022 codings. */ 3999 we should also consider the possibility of ISO2022 codings. */
4000 if ((VECTORP (Vlatin_extra_code_table) 4000 if ((VECTORP (Vlatin_extra_code_table)
4001 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) 4001 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4002 || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3) 4002 || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
5099 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) 5099 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5100 /* We can't skip any data. */ 5100 /* We can't skip any data. */
5101 break; 5101 break;
5102 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL) 5102 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5103 { 5103 {
5104 unsigned char *bol = begp; 5104 unsigned char *bol = begp;
5105 while (begp < endp && *begp < 0x80) 5105 while (begp < endp && *begp < 0x80)
5106 { 5106 {
5107 begp++; 5107 begp++;
5108 if (begp[-1] == '\n') 5108 if (begp[-1] == '\n')
5109 bol = begp; 5109 bol = begp;
5597 && XVECTOR (eol_type)->size == 3 5597 && XVECTOR (eol_type)->size == 3
5598 && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF])) 5598 && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5599 coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF]; 5599 coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5600 else 5600 else
5601 coding->symbol = saved_coding_symbol; 5601 coding->symbol = saved_coding_symbol;
5602 5602
5603 continue; 5603 continue;
5604 } 5604 }
5605 if (len_byte <= 0) 5605 if (len_byte <= 0)
5606 { 5606 {
5607 if (coding->type != coding_type_ccl 5607 if (coding->type != coding_type_ccl
5615 /* The source text ends in invalid codes. Let's just 5615 /* The source text ends in invalid codes. Let's just
5616 make them valid buffer contents, and finish conversion. */ 5616 make them valid buffer contents, and finish conversion. */
5617 if (multibyte_p) 5617 if (multibyte_p)
5618 { 5618 {
5619 unsigned char *start = dst; 5619 unsigned char *start = dst;
5620 5620
5621 inserted += len_byte; 5621 inserted += len_byte;
5622 while (len_byte--) 5622 while (len_byte--)
5623 { 5623 {
5624 int c = *src++; 5624 int c = *src++;
5625 dst += CHAR_STRING (c, dst); 5625 dst += CHAR_STRING (c, dst);
5698 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; 5698 GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5699 } 5699 }
5700 inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte); 5700 inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5701 } 5701 }
5702 5702
5703 /* If we shrank the conversion area, adjust it now. */ 5703 /* If we shrank the conversion area, adjust it now. */
5704 if (total_skip > 0) 5704 if (total_skip > 0)
5705 { 5705 {
5706 if (tail_skip > 0) 5706 if (tail_skip > 0)
5707 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip); 5707 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5708 inserted += total_skip; inserted_byte += total_skip; 5708 inserted += total_skip; inserted_byte += total_skip;
5940 *--p1 = *--p0; 5940 *--p1 = *--p0;
5941 if (*p0 == '\n') *--p1 = '\r'; 5941 if (*p0 == '\n') *--p1 = '\r';
5942 } 5942 }
5943 produced += num_eol; 5943 produced += num_eol;
5944 produced_char += num_eol; 5944 produced_char += num_eol;
5945 } 5945 }
5946 /* Suppress eol-format conversion in the further conversion. */ 5946 /* Suppress eol-format conversion in the further conversion. */
5947 coding->eol_type = CODING_EOL_LF; 5947 coding->eol_type = CODING_EOL_LF;
5948 5948
5949 /* Set the coding system symbol to that for Unix-like EOL. */ 5949 /* Set the coding system symbol to that for Unix-like EOL. */
5950 eol_type = Fget (saved_coding_symbol, Qeol_type); 5950 eol_type = Fget (saved_coding_symbol, Qeol_type);
6210 if (VECTORP (eol)) 6210 if (VECTORP (eol))
6211 XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]); 6211 XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6212 } 6212 }
6213 } 6213 }
6214 return (highest ? XCAR (val) : val); 6214 return (highest ? XCAR (val) : val);
6215 } 6215 }
6216 6216
6217 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, 6217 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6218 2, 3, 0, 6218 2, 3, 0,
6219 doc: /* Detect coding system of the text in the region between START and END. 6219 doc: /* Detect coding system of the text in the region between START and END.
6220 Return a list of possible coding systems ordered by priority. 6220 Return a list of possible coding systems ordered by priority.
6406 Check if it contains eight-bit-graphic. */ 6406 Check if it contains eight-bit-graphic. */
6407 p = p1; 6407 p = p1;
6408 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++); 6408 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6409 if (p == p1end) 6409 if (p == p1end)
6410 { 6410 {
6411 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++); 6411 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6412 if (p == p2end) 6412 if (p == p2end)
6413 return Qt; 6413 return Qt;
6414 } 6414 }
6415 } 6415 }
6416 6416
6938 void 6938 void
6939 init_coding_once () 6939 init_coding_once ()
6940 { 6940 {
6941 int i; 6941 int i;
6942 6942
6943 /* Emacs' internal format specific initialize routine. */ 6943 /* Emacs' internal format specific initialize routine. */
6944 for (i = 0; i <= 0x20; i++) 6944 for (i = 0; i <= 0x20; i++)
6945 emacs_code_class[i] = EMACS_control_code; 6945 emacs_code_class[i] = EMACS_control_code;
6946 emacs_code_class[0x0A] = EMACS_linefeed_code; 6946 emacs_code_class[0x0A] = EMACS_linefeed_code;
6947 emacs_code_class[0x0D] = EMACS_carriage_return_code; 6947 emacs_code_class[0x0D] = EMACS_carriage_return_code;
6948 for (i = 0x21 ; i < 0x7F; i++) 6948 for (i = 0x21 ; i < 0x7F; i++)