Mercurial > emacs
comparison src/coding.c @ 29725:2bc397e9b09a
(setup_coding_system) <4>: Reset member `cr_carryover'.
(ccl_coding_driver): On encoding, initialize ccl->eol_type.
(decode_eol_post_ccl): New function.
(decode_coding): Don't detect EOL format here for CCL based coding
systems.
(decode_coding) <coding_type_ccl>: Handle carryovered CR. Call
decode_eol_post_ccl after running the CCL program.
(code_convert_region): Don't detect EOL format here for CCL based
coding systems.
(decode_coding_string): Likewise.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Mon, 19 Jun 2000 05:18:09 +0000 |
| parents | ebf778ab6b42 |
| children | 7b43e1fb478a |
comparison
equal
deleted
inserted
replaced
| 29724:caf7f927357c | 29725:2bc397e9b09a |
|---|---|
| 3200 } | 3200 } |
| 3201 } | 3201 } |
| 3202 } | 3202 } |
| 3203 } | 3203 } |
| 3204 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; | 3204 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; |
| 3205 coding->spec.ccl.cr_carryover = 0; | |
| 3205 break; | 3206 break; |
| 3206 | 3207 |
| 3207 case 5: | 3208 case 5: |
| 3208 coding->type = coding_type_raw_text; | 3209 coding->type = coding_type_raw_text; |
| 3209 break; | 3210 break; |
| 3881 struct ccl_program *ccl | 3882 struct ccl_program *ccl |
| 3882 = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder; | 3883 = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder; |
| 3883 int result; | 3884 int result; |
| 3884 | 3885 |
| 3885 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK; | 3886 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK; |
| 3886 | 3887 if (encodep) |
| 3888 ccl->eol_type = coding->eol_type; | |
| 3887 coding->produced = ccl_driver (ccl, source, destination, | 3889 coding->produced = ccl_driver (ccl, source, destination, |
| 3888 src_bytes, dst_bytes, &(coding->consumed)); | 3890 src_bytes, dst_bytes, &(coding->consumed)); |
| 3889 if (encodep) | 3891 if (encodep) |
| 3890 coding->produced_char = coding->produced; | 3892 coding->produced_char = coding->produced; |
| 3891 else | 3893 else |
| 3914 break; | 3916 break; |
| 3915 } | 3917 } |
| 3916 return result; | 3918 return result; |
| 3917 } | 3919 } |
| 3918 | 3920 |
| 3921 /* Decode EOL format of the text at PTR of BYTES length destructively | |
| 3922 according to CODING->eol_type. This is called after the CCL | |
| 3923 program produced a decoded text at PTR. If we do CRLF->LF | |
| 3924 conversion, update CODING->produced and CODING->produced_char. */ | |
| 3925 | |
| 3926 static void | |
| 3927 decode_eol_post_ccl (coding, ptr, bytes) | |
| 3928 struct coding_system *coding; | |
| 3929 unsigned char *ptr; | |
| 3930 int bytes; | |
| 3931 { | |
| 3932 Lisp_Object val, saved_coding_symbol; | |
| 3933 unsigned char *pend = ptr + bytes; | |
| 3934 int dummy; | |
| 3935 | |
| 3936 /* Remember the current coding system symbol. We set it back when | |
| 3937 an inconsistent EOL is found so that `last-coding-system-used' is | |
| 3938 set to the coding system that doesn't specify EOL conversion. */ | |
| 3939 saved_coding_symbol = coding->symbol; | |
| 3940 | |
| 3941 coding->spec.ccl.cr_carryover = 0; | |
| 3942 if (coding->eol_type == CODING_EOL_UNDECIDED) | |
| 3943 { | |
| 3944 /* Here, to avoid the call of setup_coding_system, we directly | |
| 3945 call detect_eol_type. */ | |
| 3946 coding->eol_type = detect_eol_type (ptr, bytes, &dummy); | |
| 3947 val = Fget (coding->symbol, Qeol_type); | |
| 3948 if (VECTORP (val) && XVECTOR (val)->size == 3) | |
| 3949 coding->symbol = XVECTOR (val)->contents[coding->eol_type]; | |
| 3950 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; | |
| 3951 } | |
| 3952 | |
| 3953 if (coding->eol_type == CODING_EOL_LF) | |
| 3954 { | |
| 3955 /* We have nothing to do. */ | |
| 3956 ptr = pend; | |
| 3957 } | |
| 3958 else if (coding->eol_type == CODING_EOL_CRLF) | |
| 3959 { | |
| 3960 unsigned char *pstart = ptr, *p = ptr; | |
| 3961 | |
| 3962 if (! (coding->mode & CODING_MODE_LAST_BLOCK) | |
| 3963 && *(pend - 1) == '\r') | |
| 3964 { | |
| 3965 /* If the last character is CR, we can't handle it here | |
| 3966 because LF will be in the not-yet-decoded source text. | |
| 3967 Recorded that the CR is not yet processed. */ | |
| 3968 coding->spec.ccl.cr_carryover = 1; | |
| 3969 coding->produced--; | |
| 3970 coding->produced_char--; | |
| 3971 pend--; | |
| 3972 } | |
| 3973 while (ptr < pend) | |
| 3974 { | |
| 3975 if (*ptr == '\r') | |
| 3976 { | |
| 3977 if (ptr + 1 < pend && *(ptr + 1) == '\n') | |
| 3978 { | |
| 3979 *p++ = '\n'; | |
| 3980 ptr += 2; | |
| 3981 } | |
| 3982 else | |
| 3983 { | |
| 3984 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | |
| 3985 goto undo_eol_conversion; | |
| 3986 *p++ = *ptr++; | |
| 3987 } | |
| 3988 } | |
| 3989 else if (*ptr == '\n' | |
| 3990 && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | |
| 3991 goto undo_eol_conversion; | |
| 3992 else | |
| 3993 *p++ = *ptr++; | |
| 3994 continue; | |
| 3995 | |
| 3996 undo_eol_conversion: | |
| 3997 /* We have faced with inconsistent EOL format at PTR. | |
| 3998 Convert all LFs before PTR back to CRLFs. */ | |
| 3999 for (p--, ptr--; p >= pstart; p--) | |
| 4000 { | |
| 4001 if (*p == '\n') | |
| 4002 *ptr-- = '\n', *ptr-- = '\r'; | |
| 4003 else | |
| 4004 *ptr-- = *p; | |
| 4005 } | |
| 4006 /* If carryover is recorded, cancel it because we don't | |
| 4007 convert CRLF anymore. */ | |
| 4008 if (coding->spec.ccl.cr_carryover) | |
| 4009 { | |
| 4010 coding->spec.ccl.cr_carryover = 0; | |
| 4011 coding->produced++; | |
| 4012 coding->produced_char++; | |
| 4013 pend++; | |
| 4014 } | |
| 4015 p = ptr = pend; | |
| 4016 coding->eol_type = CODING_EOL_LF; | |
| 4017 coding->symbol = saved_coding_symbol; | |
| 4018 } | |
| 4019 if (p < pend) | |
| 4020 { | |
| 4021 /* As each two-byte sequence CRLF was converted to LF, (PEND | |
| 4022 - P) is the number of deleted characters. */ | |
| 4023 coding->produced -= pend - p; | |
| 4024 coding->produced_char -= pend - p; | |
| 4025 } | |
| 4026 } | |
| 4027 else /* i.e. coding->eol_type == CODING_EOL_CR */ | |
| 4028 { | |
| 4029 unsigned char *p = ptr; | |
| 4030 | |
| 4031 for (; ptr < pend; ptr++) | |
| 4032 { | |
| 4033 if (*ptr == '\r') | |
| 4034 *ptr = '\n'; | |
| 4035 else if (*ptr == '\n' | |
| 4036 && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | |
| 4037 { | |
| 4038 for (; p < ptr; p++) | |
| 4039 { | |
| 4040 if (*p == '\n') | |
| 4041 *p = '\r'; | |
| 4042 } | |
| 4043 ptr = pend; | |
| 4044 coding->eol_type = CODING_EOL_LF; | |
| 4045 coding->symbol = saved_coding_symbol; | |
| 4046 } | |
| 4047 } | |
| 4048 } | |
| 4049 } | |
| 4050 | |
| 3919 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before | 4051 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before |
| 3920 decoding, it may detect coding system and format of end-of-line if | 4052 decoding, it may detect coding system and format of end-of-line if |
| 3921 those are not yet decided. The source should be unibyte, the | 4053 those are not yet decided. The source should be unibyte, the |
| 3922 result is multibyte if CODING->dst_multibyte is nonzero, else | 4054 result is multibyte if CODING->dst_multibyte is nonzero, else |
| 3923 unibyte. */ | 4055 unibyte. */ |
| 3929 int src_bytes, dst_bytes; | 4061 int src_bytes, dst_bytes; |
| 3930 { | 4062 { |
| 3931 if (coding->type == coding_type_undecided) | 4063 if (coding->type == coding_type_undecided) |
| 3932 detect_coding (coding, source, src_bytes); | 4064 detect_coding (coding, source, src_bytes); |
| 3933 | 4065 |
| 3934 if (coding->eol_type == CODING_EOL_UNDECIDED) | 4066 if (coding->eol_type == CODING_EOL_UNDECIDED |
| 4067 && coding->type != coding_type_ccl) | |
| 3935 detect_eol (coding, source, src_bytes); | 4068 detect_eol (coding, source, src_bytes); |
| 3936 | 4069 |
| 3937 coding->produced = coding->produced_char = 0; | 4070 coding->produced = coding->produced_char = 0; |
| 3938 coding->consumed = coding->consumed_char = 0; | 4071 coding->consumed = coding->consumed_char = 0; |
| 3939 coding->errors = 0; | 4072 coding->errors = 0; |
| 3960 decode_coding_emacs_mule (coding, source, destination, | 4093 decode_coding_emacs_mule (coding, source, destination, |
| 3961 src_bytes, dst_bytes); | 4094 src_bytes, dst_bytes); |
| 3962 break; | 4095 break; |
| 3963 | 4096 |
| 3964 case coding_type_ccl: | 4097 case coding_type_ccl: |
| 3965 ccl_coding_driver (coding, source, destination, | 4098 if (coding->spec.ccl.cr_carryover) |
| 4099 { | |
| 4100 /* Set the CR which is not processed by the previous call of | |
| 4101 decode_eol_post_ccl in DESTINATION. */ | |
| 4102 *destination = '\r'; | |
| 4103 coding->produced++; | |
| 4104 coding->produced_char++; | |
| 4105 dst_bytes--; | |
| 4106 } | |
| 4107 ccl_coding_driver (coding, source, | |
| 4108 destination + coding->spec.ccl.cr_carryover, | |
| 3966 src_bytes, dst_bytes, 0); | 4109 src_bytes, dst_bytes, 0); |
| 4110 if (coding->eol_type != CODING_EOL_LF) | |
| 4111 decode_eol_post_ccl (coding, destination, coding->produced); | |
| 3967 break; | 4112 break; |
| 3968 | 4113 |
| 3969 default: | 4114 default: |
| 3970 decode_eol (coding, source, destination, src_bytes, dst_bytes); | 4115 decode_eol (coding, source, destination, src_bytes, dst_bytes); |
| 3971 } | 4116 } |
| 4578 should not left it undecided because the deeper | 4723 should not left it undecided because the deeper |
| 4579 decoding routine (decode_coding) tries to detect the | 4724 decoding routine (decode_coding) tries to detect the |
| 4580 encodings again in vain. */ | 4725 encodings again in vain. */ |
| 4581 coding->type = coding_type_emacs_mule; | 4726 coding->type = coding_type_emacs_mule; |
| 4582 } | 4727 } |
| 4583 if (coding->eol_type == CODING_EOL_UNDECIDED) | 4728 if (coding->eol_type == CODING_EOL_UNDECIDED |
| 4729 && coding->type != coding_type_ccl) | |
| 4584 { | 4730 { |
| 4585 saved_coding_symbol = coding->symbol; | 4731 saved_coding_symbol = coding->symbol; |
| 4586 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); | 4732 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); |
| 4587 if (coding->eol_type == CODING_EOL_UNDECIDED) | 4733 if (coding->eol_type == CODING_EOL_UNDECIDED) |
| 4588 coding->eol_type = CODING_EOL_LF; | 4734 coding->eol_type = CODING_EOL_LF; |
| 5036 { | 5182 { |
| 5037 detect_coding (coding, XSTRING (str)->data, to_byte); | 5183 detect_coding (coding, XSTRING (str)->data, to_byte); |
| 5038 if (coding->type == coding_type_undecided) | 5184 if (coding->type == coding_type_undecided) |
| 5039 coding->type = coding_type_emacs_mule; | 5185 coding->type = coding_type_emacs_mule; |
| 5040 } | 5186 } |
| 5041 if (coding->eol_type == CODING_EOL_UNDECIDED) | 5187 if (coding->eol_type == CODING_EOL_UNDECIDED |
| 5188 && coding->type != coding_type_ccl) | |
| 5042 { | 5189 { |
| 5043 saved_coding_symbol = coding->symbol; | 5190 saved_coding_symbol = coding->symbol; |
| 5044 detect_eol (coding, XSTRING (str)->data, to_byte); | 5191 detect_eol (coding, XSTRING (str)->data, to_byte); |
| 5045 if (coding->eol_type == CODING_EOL_UNDECIDED) | 5192 if (coding->eol_type == CODING_EOL_UNDECIDED) |
| 5046 coding->eol_type = CODING_EOL_LF; | 5193 coding->eol_type = CODING_EOL_LF; |
