Mercurial > emacs
comparison src/coding.c @ 42104:d69c2368e549
(DECODE_COMPOSITION_END): Fixed a typo in the last
patch (COMPOSING_P, not COMPOSING).
| author | Sam Steingold <sds@gnu.org> |
|---|---|
| date | Mon, 17 Dec 2001 15:21:06 +0000 |
| parents | e5356ff675e3 |
| children | 09cc243e2d14 |
comparison
equal
deleted
inserted
replaced
| 42103:e5356ff675e3 | 42104:d69c2368e549 |
|---|---|
| 58 Compound Text, various EUCs (Extended Unix Code), and coding | 58 Compound Text, various EUCs (Extended Unix Code), and coding |
| 59 systems used in Internet communication such as ISO-2022-JP are | 59 systems used in Internet communication such as ISO-2022-JP are |
| 60 all variants of ISO2022. Details are described in section 3. | 60 all variants of ISO2022. Details are described in section 3. |
| 61 | 61 |
| 62 2. SJIS (or Shift-JIS or MS-Kanji-Code) | 62 2. SJIS (or Shift-JIS or MS-Kanji-Code) |
| 63 | 63 |
| 64 A coding system to encode character sets: ASCII, JISX0201, and | 64 A coding system to encode character sets: ASCII, JISX0201, and |
| 65 JISX0208. Widely used for PC's in Japan. Details are described in | 65 JISX0208. Widely used for PC's in Japan. Details are described in |
| 66 section 4. | 66 section 4. |
| 67 | 67 |
| 68 3. BIG5 | 68 3. BIG5 |
| 501 Lisp_Object | 501 Lisp_Object |
| 502 coding_safe_chars (coding) | 502 coding_safe_chars (coding) |
| 503 struct coding_system *coding; | 503 struct coding_system *coding; |
| 504 { | 504 { |
| 505 Lisp_Object coding_spec, plist, safe_chars; | 505 Lisp_Object coding_spec, plist, safe_chars; |
| 506 | 506 |
| 507 coding_spec = Fget (coding->symbol, Qcoding_system); | 507 coding_spec = Fget (coding->symbol, Qcoding_system); |
| 508 plist = XVECTOR (coding_spec)->contents[3]; | 508 plist = XVECTOR (coding_spec)->contents[3]; |
| 509 safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars); | 509 safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars); |
| 510 return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt); | 510 return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt); |
| 511 } | 511 } |
| 887 if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src)) | 887 if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src)) |
| 888 { | 888 { |
| 889 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method); | 889 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method); |
| 890 for (i = 0; i < ncomponent; i++) | 890 for (i = 0; i < ncomponent; i++) |
| 891 CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]); | 891 CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]); |
| 892 CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars); | 892 CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars); |
| 893 if (buf < bufp) | 893 if (buf < bufp) |
| 894 { | 894 { |
| 895 unsigned char *p = buf; | 895 unsigned char *p = buf; |
| 896 EMIT_BYTES (p, bufp); | 896 EMIT_BYTES (p, bufp); |
| 897 *destination += bufp - buf; | 897 *destination += bufp - buf; |
| 1053 { \ | 1053 { \ |
| 1054 coding->cmp_data = coding->cmp_data->next; \ | 1054 coding->cmp_data = coding->cmp_data->next; \ |
| 1055 coding->cmp_data_start = 0; \ | 1055 coding->cmp_data_start = 0; \ |
| 1056 } \ | 1056 } \ |
| 1057 } while (0) | 1057 } while (0) |
| 1058 | 1058 |
| 1059 | 1059 |
| 1060 static void encode_eol P_ ((struct coding_system *, unsigned char *, | 1060 static void encode_eol P_ ((struct coding_system *, unsigned char *, |
| 1061 unsigned char *, int, int)); | 1061 unsigned char *, int, int)); |
| 1062 | 1062 |
| 1063 static void | 1063 static void |
| 1431 /* Locking shift out. */ | 1431 /* Locking shift out. */ |
| 1432 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT; | 1432 mask &= ~CODING_CATEGORY_MASK_ISO_7BIT; |
| 1433 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT; | 1433 mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT; |
| 1434 } | 1434 } |
| 1435 break; | 1435 break; |
| 1436 | 1436 |
| 1437 case ISO_CODE_SI: | 1437 case ISO_CODE_SI: |
| 1438 if (inhibit_iso_escape_detection) | 1438 if (inhibit_iso_escape_detection) |
| 1439 break; | 1439 break; |
| 1440 single_shifting = 0; | 1440 single_shifting = 0; |
| 1441 if (shift_out == 1) | 1441 if (shift_out == 1) |
| 1663 | 1663 |
| 1664 /* Handle composition end sequence ESC 1. */ | 1664 /* Handle composition end sequence ESC 1. */ |
| 1665 | 1665 |
| 1666 #define DECODE_COMPOSITION_END(c1) \ | 1666 #define DECODE_COMPOSITION_END(c1) \ |
| 1667 do { \ | 1667 do { \ |
| 1668 if (! COMPOSING (coding)) \ | 1668 if (! COMPOSING_P (coding)) \ |
| 1669 { \ | 1669 { \ |
| 1670 *dst++ = ISO_CODE_ESC; \ | 1670 *dst++ = ISO_CODE_ESC; \ |
| 1671 *dst++ = c1; \ | 1671 *dst++ = c1; \ |
| 1672 coding->produced_char += 2; \ | 1672 coding->produced_char += 2; \ |
| 1673 } \ | 1673 } \ |
| 2437 while (found < 4) | 2437 while (found < 4) |
| 2438 { | 2438 { |
| 2439 ONE_MORE_CHAR (c); | 2439 ONE_MORE_CHAR (c); |
| 2440 if (c == '\n') | 2440 if (c == '\n') |
| 2441 break; | 2441 break; |
| 2442 | 2442 |
| 2443 charset = CHAR_CHARSET (c); | 2443 charset = CHAR_CHARSET (c); |
| 2444 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); | 2444 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); |
| 2445 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0) | 2445 if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0) |
| 2446 { | 2446 { |
| 2447 found++; | 2447 found++; |
| 2607 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; | 2607 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; |
| 2608 else | 2608 else |
| 2609 *dst++ = ISO_CODE_CR; | 2609 *dst++ = ISO_CODE_CR; |
| 2610 CODING_SPEC_ISO_BOL (coding) = 1; | 2610 CODING_SPEC_ISO_BOL (coding) = 1; |
| 2611 } | 2611 } |
| 2612 else | 2612 else |
| 2613 { | 2613 { |
| 2614 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) | 2614 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) |
| 2615 ENCODE_RESET_PLANE_AND_REGISTER; | 2615 ENCODE_RESET_PLANE_AND_REGISTER; |
| 2616 *dst++ = c; | 2616 *dst++ = c; |
| 2617 } | 2617 } |
| 3030 { | 3030 { |
| 3031 int c, charset, c1, c2; | 3031 int c, charset, c1, c2; |
| 3032 | 3032 |
| 3033 src_base = src; | 3033 src_base = src; |
| 3034 ONE_MORE_CHAR (c); | 3034 ONE_MORE_CHAR (c); |
| 3035 | 3035 |
| 3036 /* Now encode the character C. */ | 3036 /* Now encode the character C. */ |
| 3037 if (SINGLE_BYTE_CHAR_P (c)) | 3037 if (SINGLE_BYTE_CHAR_P (c)) |
| 3038 { | 3038 { |
| 3039 switch (c) | 3039 switch (c) |
| 3040 { | 3040 { |
| 3355 not designated initially but should be designated to graphic | 3355 not designated initially but should be designated to graphic |
| 3356 register N just before encoding a character in that charset. | 3356 register N just before encoding a character in that charset. |
| 3357 | 3357 |
| 3358 If the value is nil, graphic register N is never used on | 3358 If the value is nil, graphic register N is never used on |
| 3359 encoding. | 3359 encoding. |
| 3360 | 3360 |
| 3361 sub-element[N] where N is 4 through 11: to be set in `coding->flags' | 3361 sub-element[N] where N is 4 through 11: to be set in `coding->flags' |
| 3362 Each value takes t or nil. See the section ISO2022 of | 3362 Each value takes t or nil. See the section ISO2022 of |
| 3363 `coding.h' for more information. | 3363 `coding.h' for more information. |
| 3364 | 3364 |
| 3365 If `coding->type' is `coding_type_big5', element[4] is t to denote | 3365 If `coding->type' is `coding_type_big5', element[4] is t to denote |
| 3479 else | 3479 else |
| 3480 goto label_invalid_coding_system; | 3480 goto label_invalid_coding_system; |
| 3481 } | 3481 } |
| 3482 else | 3482 else |
| 3483 goto label_invalid_coding_system; | 3483 goto label_invalid_coding_system; |
| 3484 | 3484 |
| 3485 /* If the coding system has non-nil `composition' property, enable | 3485 /* If the coding system has non-nil `composition' property, enable |
| 3486 composition handling. */ | 3486 composition handling. */ |
| 3487 val = Fplist_get (plist, Qcomposition); | 3487 val = Fplist_get (plist, Qcomposition); |
| 3488 if (!NILP (val)) | 3488 if (!NILP (val)) |
| 3489 coding->composing = COMPOSITION_NO; | 3489 coding->composing = COMPOSITION_NO; |
| 3619 tail = XCDR (tail); | 3619 tail = XCDR (tail); |
| 3620 } | 3620 } |
| 3621 } | 3621 } |
| 3622 else | 3622 else |
| 3623 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; | 3623 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; |
| 3624 | 3624 |
| 3625 CODING_SPEC_ISO_DESIGNATION (coding, i) | 3625 CODING_SPEC_ISO_DESIGNATION (coding, i) |
| 3626 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i); | 3626 = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i); |
| 3627 } | 3627 } |
| 3628 | 3628 |
| 3629 if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)) | 3629 if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)) |
| 3992 | CODING_CATEGORY_MASK_EMACS_MULE | 3992 | CODING_CATEGORY_MASK_EMACS_MULE |
| 3993 | CODING_CATEGORY_MASK_UTF_16_BE | 3993 | CODING_CATEGORY_MASK_UTF_16_BE |
| 3994 | CODING_CATEGORY_MASK_UTF_16_LE); | 3994 | CODING_CATEGORY_MASK_UTF_16_LE); |
| 3995 | 3995 |
| 3996 /* Or, if C is a special latin extra code, | 3996 /* Or, if C is a special latin extra code, |
| 3997 or is an ISO2022 specific control code of C1 (SS2 or SS3), | 3997 or is an ISO2022 specific control code of C1 (SS2 or SS3), |
| 3998 or is an ISO2022 control-sequence-introducer (CSI), | 3998 or is an ISO2022 control-sequence-introducer (CSI), |
| 3999 we should also consider the possibility of ISO2022 codings. */ | 3999 we should also consider the possibility of ISO2022 codings. */ |
| 4000 if ((VECTORP (Vlatin_extra_code_table) | 4000 if ((VECTORP (Vlatin_extra_code_table) |
| 4001 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | 4001 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) |
| 4002 || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3) | 4002 || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3) |
| 5099 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) | 5099 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) |
| 5100 /* We can't skip any data. */ | 5100 /* We can't skip any data. */ |
| 5101 break; | 5101 break; |
| 5102 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL) | 5102 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL) |
| 5103 { | 5103 { |
| 5104 unsigned char *bol = begp; | 5104 unsigned char *bol = begp; |
| 5105 while (begp < endp && *begp < 0x80) | 5105 while (begp < endp && *begp < 0x80) |
| 5106 { | 5106 { |
| 5107 begp++; | 5107 begp++; |
| 5108 if (begp[-1] == '\n') | 5108 if (begp[-1] == '\n') |
| 5109 bol = begp; | 5109 bol = begp; |
| 5597 && XVECTOR (eol_type)->size == 3 | 5597 && XVECTOR (eol_type)->size == 3 |
| 5598 && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF])) | 5598 && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF])) |
| 5599 coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF]; | 5599 coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF]; |
| 5600 else | 5600 else |
| 5601 coding->symbol = saved_coding_symbol; | 5601 coding->symbol = saved_coding_symbol; |
| 5602 | 5602 |
| 5603 continue; | 5603 continue; |
| 5604 } | 5604 } |
| 5605 if (len_byte <= 0) | 5605 if (len_byte <= 0) |
| 5606 { | 5606 { |
| 5607 if (coding->type != coding_type_ccl | 5607 if (coding->type != coding_type_ccl |
| 5615 /* The source text ends in invalid codes. Let's just | 5615 /* The source text ends in invalid codes. Let's just |
| 5616 make them valid buffer contents, and finish conversion. */ | 5616 make them valid buffer contents, and finish conversion. */ |
| 5617 if (multibyte_p) | 5617 if (multibyte_p) |
| 5618 { | 5618 { |
| 5619 unsigned char *start = dst; | 5619 unsigned char *start = dst; |
| 5620 | 5620 |
| 5621 inserted += len_byte; | 5621 inserted += len_byte; |
| 5622 while (len_byte--) | 5622 while (len_byte--) |
| 5623 { | 5623 { |
| 5624 int c = *src++; | 5624 int c = *src++; |
| 5625 dst += CHAR_STRING (c, dst); | 5625 dst += CHAR_STRING (c, dst); |
| 5698 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | 5698 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; |
| 5699 } | 5699 } |
| 5700 inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte); | 5700 inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte); |
| 5701 } | 5701 } |
| 5702 | 5702 |
| 5703 /* If we shrank the conversion area, adjust it now. */ | 5703 /* If we shrank the conversion area, adjust it now. */ |
| 5704 if (total_skip > 0) | 5704 if (total_skip > 0) |
| 5705 { | 5705 { |
| 5706 if (tail_skip > 0) | 5706 if (tail_skip > 0) |
| 5707 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip); | 5707 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip); |
| 5708 inserted += total_skip; inserted_byte += total_skip; | 5708 inserted += total_skip; inserted_byte += total_skip; |
| 5940 *--p1 = *--p0; | 5940 *--p1 = *--p0; |
| 5941 if (*p0 == '\n') *--p1 = '\r'; | 5941 if (*p0 == '\n') *--p1 = '\r'; |
| 5942 } | 5942 } |
| 5943 produced += num_eol; | 5943 produced += num_eol; |
| 5944 produced_char += num_eol; | 5944 produced_char += num_eol; |
| 5945 } | 5945 } |
| 5946 /* Suppress eol-format conversion in the further conversion. */ | 5946 /* Suppress eol-format conversion in the further conversion. */ |
| 5947 coding->eol_type = CODING_EOL_LF; | 5947 coding->eol_type = CODING_EOL_LF; |
| 5948 | 5948 |
| 5949 /* Set the coding system symbol to that for Unix-like EOL. */ | 5949 /* Set the coding system symbol to that for Unix-like EOL. */ |
| 5950 eol_type = Fget (saved_coding_symbol, Qeol_type); | 5950 eol_type = Fget (saved_coding_symbol, Qeol_type); |
| 6210 if (VECTORP (eol)) | 6210 if (VECTORP (eol)) |
| 6211 XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]); | 6211 XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]); |
| 6212 } | 6212 } |
| 6213 } | 6213 } |
| 6214 return (highest ? XCAR (val) : val); | 6214 return (highest ? XCAR (val) : val); |
| 6215 } | 6215 } |
| 6216 | 6216 |
| 6217 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, | 6217 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, |
| 6218 2, 3, 0, | 6218 2, 3, 0, |
| 6219 doc: /* Detect coding system of the text in the region between START and END. | 6219 doc: /* Detect coding system of the text in the region between START and END. |
| 6220 Return a list of possible coding systems ordered by priority. | 6220 Return a list of possible coding systems ordered by priority. |
| 6406 Check if it contains eight-bit-graphic. */ | 6406 Check if it contains eight-bit-graphic. */ |
| 6407 p = p1; | 6407 p = p1; |
| 6408 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++); | 6408 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++); |
| 6409 if (p == p1end) | 6409 if (p == p1end) |
| 6410 { | 6410 { |
| 6411 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++); | 6411 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++); |
| 6412 if (p == p2end) | 6412 if (p == p2end) |
| 6413 return Qt; | 6413 return Qt; |
| 6414 } | 6414 } |
| 6415 } | 6415 } |
| 6416 | 6416 |
| 6938 void | 6938 void |
| 6939 init_coding_once () | 6939 init_coding_once () |
| 6940 { | 6940 { |
| 6941 int i; | 6941 int i; |
| 6942 | 6942 |
| 6943 /* Emacs' internal format specific initialize routine. */ | 6943 /* Emacs' internal format specific initialize routine. */ |
| 6944 for (i = 0; i <= 0x20; i++) | 6944 for (i = 0; i <= 0x20; i++) |
| 6945 emacs_code_class[i] = EMACS_control_code; | 6945 emacs_code_class[i] = EMACS_control_code; |
| 6946 emacs_code_class[0x0A] = EMACS_linefeed_code; | 6946 emacs_code_class[0x0A] = EMACS_linefeed_code; |
| 6947 emacs_code_class[0x0D] = EMACS_carriage_return_code; | 6947 emacs_code_class[0x0D] = EMACS_carriage_return_code; |
| 6948 for (i = 0x21 ; i < 0x7F; i++) | 6948 for (i = 0x21 ; i < 0x7F; i++) |
