comparison src/coding.c @ 29725:2bc397e9b09a

(setup_coding_system) <4>: Reset member `cr_carryover'. (ccl_coding_driver): On encoding, initialize ccl->eol_type. (decode_eol_post_ccl): New function. (decode_coding): Don't detect EOL format here for CCL based coding systems. (decode_coding) <coding_type_ccl>: Handle carryovered CR. Call decode_eol_post_ccl after running the CCL program. (code_convert_region): Don't detect EOL format here for CCL based coding systems. (decode_coding_string): Likewise.
author Kenichi Handa <handa@m17n.org>
date Mon, 19 Jun 2000 05:18:09 +0000
parents ebf778ab6b42
children 7b43e1fb478a
comparison
equal deleted inserted replaced
29724:caf7f927357c 29725:2bc397e9b09a
3200 } 3200 }
3201 } 3201 }
3202 } 3202 }
3203 } 3203 }
3204 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; 3204 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3205 coding->spec.ccl.cr_carryover = 0;
3205 break; 3206 break;
3206 3207
3207 case 5: 3208 case 5:
3208 coding->type = coding_type_raw_text; 3209 coding->type = coding_type_raw_text;
3209 break; 3210 break;
3881 struct ccl_program *ccl 3882 struct ccl_program *ccl
3882 = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder; 3883 = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3883 int result; 3884 int result;
3884 3885
3885 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK; 3886 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3886 3887 if (encodep)
3888 ccl->eol_type = coding->eol_type;
3887 coding->produced = ccl_driver (ccl, source, destination, 3889 coding->produced = ccl_driver (ccl, source, destination,
3888 src_bytes, dst_bytes, &(coding->consumed)); 3890 src_bytes, dst_bytes, &(coding->consumed));
3889 if (encodep) 3891 if (encodep)
3890 coding->produced_char = coding->produced; 3892 coding->produced_char = coding->produced;
3891 else 3893 else
3914 break; 3916 break;
3915 } 3917 }
3916 return result; 3918 return result;
3917 } 3919 }
3918 3920
3921 /* Decode EOL format of the text at PTR of BYTES length destructively
3922 according to CODING->eol_type. This is called after the CCL
3923 program produced a decoded text at PTR. If we do CRLF->LF
3924 conversion, update CODING->produced and CODING->produced_char. */
3925
3926 static void
3927 decode_eol_post_ccl (coding, ptr, bytes)
3928 struct coding_system *coding;
3929 unsigned char *ptr;
3930 int bytes;
3931 {
3932 Lisp_Object val, saved_coding_symbol;
3933 unsigned char *pend = ptr + bytes;
3934 int dummy;
3935
3936 /* Remember the current coding system symbol. We set it back when
3937 an inconsistent EOL is found so that `last-coding-system-used' is
3938 set to the coding system that doesn't specify EOL conversion. */
3939 saved_coding_symbol = coding->symbol;
3940
3941 coding->spec.ccl.cr_carryover = 0;
3942 if (coding->eol_type == CODING_EOL_UNDECIDED)
3943 {
3944 /* Here, to avoid the call of setup_coding_system, we directly
3945 call detect_eol_type. */
3946 coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
3947 val = Fget (coding->symbol, Qeol_type);
3948 if (VECTORP (val) && XVECTOR (val)->size == 3)
3949 coding->symbol = XVECTOR (val)->contents[coding->eol_type];
3950 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
3951 }
3952
3953 if (coding->eol_type == CODING_EOL_LF)
3954 {
3955 /* We have nothing to do. */
3956 ptr = pend;
3957 }
3958 else if (coding->eol_type == CODING_EOL_CRLF)
3959 {
3960 unsigned char *pstart = ptr, *p = ptr;
3961
3962 if (! (coding->mode & CODING_MODE_LAST_BLOCK)
3963 && *(pend - 1) == '\r')
3964 {
3965 /* If the last character is CR, we can't handle it here
3966 because LF will be in the not-yet-decoded source text.
3967 Recorded that the CR is not yet processed. */
3968 coding->spec.ccl.cr_carryover = 1;
3969 coding->produced--;
3970 coding->produced_char--;
3971 pend--;
3972 }
3973 while (ptr < pend)
3974 {
3975 if (*ptr == '\r')
3976 {
3977 if (ptr + 1 < pend && *(ptr + 1) == '\n')
3978 {
3979 *p++ = '\n';
3980 ptr += 2;
3981 }
3982 else
3983 {
3984 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3985 goto undo_eol_conversion;
3986 *p++ = *ptr++;
3987 }
3988 }
3989 else if (*ptr == '\n'
3990 && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3991 goto undo_eol_conversion;
3992 else
3993 *p++ = *ptr++;
3994 continue;
3995
3996 undo_eol_conversion:
3997 /* We have faced with inconsistent EOL format at PTR.
3998 Convert all LFs before PTR back to CRLFs. */
3999 for (p--, ptr--; p >= pstart; p--)
4000 {
4001 if (*p == '\n')
4002 *ptr-- = '\n', *ptr-- = '\r';
4003 else
4004 *ptr-- = *p;
4005 }
4006 /* If carryover is recorded, cancel it because we don't
4007 convert CRLF anymore. */
4008 if (coding->spec.ccl.cr_carryover)
4009 {
4010 coding->spec.ccl.cr_carryover = 0;
4011 coding->produced++;
4012 coding->produced_char++;
4013 pend++;
4014 }
4015 p = ptr = pend;
4016 coding->eol_type = CODING_EOL_LF;
4017 coding->symbol = saved_coding_symbol;
4018 }
4019 if (p < pend)
4020 {
4021 /* As each two-byte sequence CRLF was converted to LF, (PEND
4022 - P) is the number of deleted characters. */
4023 coding->produced -= pend - p;
4024 coding->produced_char -= pend - p;
4025 }
4026 }
4027 else /* i.e. coding->eol_type == CODING_EOL_CR */
4028 {
4029 unsigned char *p = ptr;
4030
4031 for (; ptr < pend; ptr++)
4032 {
4033 if (*ptr == '\r')
4034 *ptr = '\n';
4035 else if (*ptr == '\n'
4036 && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4037 {
4038 for (; p < ptr; p++)
4039 {
4040 if (*p == '\n')
4041 *p = '\r';
4042 }
4043 ptr = pend;
4044 coding->eol_type = CODING_EOL_LF;
4045 coding->symbol = saved_coding_symbol;
4046 }
4047 }
4048 }
4049 }
4050
3919 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before 4051 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
3920 decoding, it may detect coding system and format of end-of-line if 4052 decoding, it may detect coding system and format of end-of-line if
3921 those are not yet decided. The source should be unibyte, the 4053 those are not yet decided. The source should be unibyte, the
3922 result is multibyte if CODING->dst_multibyte is nonzero, else 4054 result is multibyte if CODING->dst_multibyte is nonzero, else
3923 unibyte. */ 4055 unibyte. */
3929 int src_bytes, dst_bytes; 4061 int src_bytes, dst_bytes;
3930 { 4062 {
3931 if (coding->type == coding_type_undecided) 4063 if (coding->type == coding_type_undecided)
3932 detect_coding (coding, source, src_bytes); 4064 detect_coding (coding, source, src_bytes);
3933 4065
3934 if (coding->eol_type == CODING_EOL_UNDECIDED) 4066 if (coding->eol_type == CODING_EOL_UNDECIDED
4067 && coding->type != coding_type_ccl)
3935 detect_eol (coding, source, src_bytes); 4068 detect_eol (coding, source, src_bytes);
3936 4069
3937 coding->produced = coding->produced_char = 0; 4070 coding->produced = coding->produced_char = 0;
3938 coding->consumed = coding->consumed_char = 0; 4071 coding->consumed = coding->consumed_char = 0;
3939 coding->errors = 0; 4072 coding->errors = 0;
3960 decode_coding_emacs_mule (coding, source, destination, 4093 decode_coding_emacs_mule (coding, source, destination,
3961 src_bytes, dst_bytes); 4094 src_bytes, dst_bytes);
3962 break; 4095 break;
3963 4096
3964 case coding_type_ccl: 4097 case coding_type_ccl:
3965 ccl_coding_driver (coding, source, destination, 4098 if (coding->spec.ccl.cr_carryover)
4099 {
4100 /* Set the CR which is not processed by the previous call of
4101 decode_eol_post_ccl in DESTINATION. */
4102 *destination = '\r';
4103 coding->produced++;
4104 coding->produced_char++;
4105 dst_bytes--;
4106 }
4107 ccl_coding_driver (coding, source,
4108 destination + coding->spec.ccl.cr_carryover,
3966 src_bytes, dst_bytes, 0); 4109 src_bytes, dst_bytes, 0);
4110 if (coding->eol_type != CODING_EOL_LF)
4111 decode_eol_post_ccl (coding, destination, coding->produced);
3967 break; 4112 break;
3968 4113
3969 default: 4114 default:
3970 decode_eol (coding, source, destination, src_bytes, dst_bytes); 4115 decode_eol (coding, source, destination, src_bytes, dst_bytes);
3971 } 4116 }
4578 should not left it undecided because the deeper 4723 should not left it undecided because the deeper
4579 decoding routine (decode_coding) tries to detect the 4724 decoding routine (decode_coding) tries to detect the
4580 encodings again in vain. */ 4725 encodings again in vain. */
4581 coding->type = coding_type_emacs_mule; 4726 coding->type = coding_type_emacs_mule;
4582 } 4727 }
4583 if (coding->eol_type == CODING_EOL_UNDECIDED) 4728 if (coding->eol_type == CODING_EOL_UNDECIDED
4729 && coding->type != coding_type_ccl)
4584 { 4730 {
4585 saved_coding_symbol = coding->symbol; 4731 saved_coding_symbol = coding->symbol;
4586 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); 4732 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4587 if (coding->eol_type == CODING_EOL_UNDECIDED) 4733 if (coding->eol_type == CODING_EOL_UNDECIDED)
4588 coding->eol_type = CODING_EOL_LF; 4734 coding->eol_type = CODING_EOL_LF;
5036 { 5182 {
5037 detect_coding (coding, XSTRING (str)->data, to_byte); 5183 detect_coding (coding, XSTRING (str)->data, to_byte);
5038 if (coding->type == coding_type_undecided) 5184 if (coding->type == coding_type_undecided)
5039 coding->type = coding_type_emacs_mule; 5185 coding->type = coding_type_emacs_mule;
5040 } 5186 }
5041 if (coding->eol_type == CODING_EOL_UNDECIDED) 5187 if (coding->eol_type == CODING_EOL_UNDECIDED
5188 && coding->type != coding_type_ccl)
5042 { 5189 {
5043 saved_coding_symbol = coding->symbol; 5190 saved_coding_symbol = coding->symbol;
5044 detect_eol (coding, XSTRING (str)->data, to_byte); 5191 detect_eol (coding, XSTRING (str)->data, to_byte);
5045 if (coding->eol_type == CODING_EOL_UNDECIDED) 5192 if (coding->eol_type == CODING_EOL_UNDECIDED)
5046 coding->eol_type = CODING_EOL_LF; 5193 coding->eol_type = CODING_EOL_LF;